Beispiel #1
0
/**
 * release_console_sem - unlock the console system
 *
 * Releases the semaphore which the caller holds on the console system
 * and the console driver list.
 *
 * While the semaphore was held, console output may have been buffered
 * by printk().  If this is the case, release_console_sem() emits
 * the output prior to releasing the semaphore.
 *
 * If there is output waiting for klogd, we wake it up.
 *
 * release_console_sem() may be called from any context.
 */
void release_console_sem(void)
{
	unsigned long flags;
	unsigned _con_start, _log_end;
	unsigned wake_klogd = 0;

	if (console_suspended) {
		up(&secondary_console_sem);
		return;
	}

	console_may_schedule = 0;

	for ( ; ; ) {
		spin_lock_irqsave(&logbuf_lock, flags);
		wake_klogd |= log_start - log_end;
		if (con_start == log_end)
			break;			/* Nothing to print */
		_con_start = con_start;
		_log_end = log_end;
		con_start = log_end;		/* Flush */
		spin_unlock(&logbuf_lock);
		stop_critical_timings();	/* don't trace print latency */
		call_console_drivers(_con_start, _log_end);
		start_critical_timings();
		local_irq_restore(flags);
	}
	console_locked = 0;
	up(&console_sem);
	spin_unlock_irqrestore(&logbuf_lock, flags);
	if (wake_klogd)
		wake_up_klogd();
}
Beispiel #2
0
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
void cpu_idle(void)
{
	int cpu = smp_processor_id();

	current_thread_info()->status |= TS_POLLING;

	/* endless idle loop with no priority at all */
	while (1) {
		tick_nohz_stop_sched_tick(1);
		while (!need_resched()) {

			check_pgt_cache();
			rmb();

			if (rcu_pending(cpu))
				rcu_check_callbacks(cpu, 0);

			if (cpu_is_offline(cpu))
				play_dead();

			local_irq_disable();
			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
			/* Don't trace irqs off for idle */
			stop_critical_timings();
			pm_idle();
			start_critical_timings();
		}
		tick_nohz_restart_sched_tick();
		preempt_enable_no_resched();
		schedule();
		preempt_disable();
	}
}
Beispiel #3
0
/*
 * The body of the idle task.
 */
void cpu_idle(void)
{
	if (ppc_md.idle_loop)
		ppc_md.idle_loop();	/* doesn't return */

	set_thread_flag(TIF_POLLING_NRFLAG);
	while (1) {
		tick_nohz_idle_enter();
		rcu_idle_enter();

		while (!need_resched() && !cpu_should_die()) {
			ppc64_runlatch_off();

			if (ppc_md.power_save) {
				clear_thread_flag(TIF_POLLING_NRFLAG);
				/*
				 * smp_mb is so clearing of TIF_POLLING_NRFLAG
				 * is ordered w.r.t. need_resched() test.
				 */
				smp_mb();
				local_irq_disable();

				/* Don't trace irqs off for idle */
				stop_critical_timings();

				/* check again after disabling irqs */
				if (!need_resched() && !cpu_should_die())
					ppc_md.power_save();

				start_critical_timings();

				/* Some power_save functions return with
				 * interrupts enabled, some don't.
				 */
				if (irqs_disabled())
					local_irq_enable();
				set_thread_flag(TIF_POLLING_NRFLAG);

			} else {
				/*
				 * Go into low thread priority and possibly
				 * low power mode.
				 */
				HMT_low();
				HMT_very_low();
			}
		}

		HMT_medium();
		ppc64_runlatch_on();
		rcu_idle_exit();
		tick_nohz_idle_exit();
		if (cpu_should_die()) {
			sched_preempt_enable_no_resched();
			cpu_die();
		}
		schedule_preempt_disabled();
	}
}
Beispiel #4
0
/**
 * default_idle_call - Default CPU idle routine.
 *
 * To use when the cpuidle framework cannot be used.
 */
void default_idle_call(void)
{
	if (current_clr_polling_and_test()) {
		local_irq_enable();
	} else {
		stop_critical_timings();
		arch_cpu_idle();
		start_critical_timings();
	}
}
Beispiel #5
0
/*
 * Generic idle loop implementation
 */
static void cpu_idle_loop(void)
{
	while (1) {
		tick_nohz_idle_enter();

		while (!need_resched()) {
			check_pgt_cache();
			rmb();

			if (cpu_is_offline(smp_processor_id()))
				arch_cpu_idle_dead();

			local_irq_disable();
			arch_cpu_idle_enter();

			/*
			 * In poll mode we reenable interrupts and spin.
			 *
			 * Also if we detected in the wakeup from idle
			 * path that the tick broadcast device expired
			 * for us, we don't want to go deep idle as we
			 * know that the IPI is going to arrive right
			 * away
			 */
			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
				cpu_idle_poll();
			} else {
				if (!current_clr_polling_and_test()) {
					stop_critical_timings();
					rcu_idle_enter();
					arch_cpu_idle();
					WARN_ON_ONCE(irqs_disabled());
					rcu_idle_exit();
					start_critical_timings();
				} else {
					local_irq_enable();
				}
				__current_set_polling();
			}
			arch_cpu_idle_exit();
		}

		/*
		 * Since we fell out of the loop above, we know
		 * TIF_NEED_RESCHED must be set, propagate it into
		 * PREEMPT_NEED_RESCHED.
		 *
		 * This is required because for polling idle loops we will
		 * not have had an IPI to fold the state for us.
		 */
		preempt_set_need_resched();
		tick_nohz_idle_exit();
		schedule_preempt_disabled();
	}
}
Beispiel #6
0
static inline int cpu_idle_poll(void)
{
	rcu_idle_enter();
	trace_cpu_idle_rcuidle(0, smp_processor_id());
	local_irq_enable();
	stop_critical_timings();
	while (!tif_need_resched() &&
		(cpu_idle_force_poll || tick_check_broadcast_expired()))
		cpu_relax();
	start_critical_timings();
	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
	rcu_idle_exit();
	return 1;
}
Beispiel #7
0
void cpu_idle(void)
{
	if (ppc_md.idle_loop)
		ppc_md.idle_loop();	

	set_thread_flag(TIF_POLLING_NRFLAG);
	while (1) {
		tick_nohz_idle_enter();
		rcu_idle_enter();

		while (!need_resched() && !cpu_should_die()) {
			ppc64_runlatch_off();

			if (ppc_md.power_save) {
				clear_thread_flag(TIF_POLLING_NRFLAG);
				smp_mb();
				local_irq_disable();

				
				stop_critical_timings();

				
				if (!need_resched() && !cpu_should_die())
					ppc_md.power_save();

				start_critical_timings();

				if (irqs_disabled())
					local_irq_enable();
				set_thread_flag(TIF_POLLING_NRFLAG);

			} else {
				HMT_low();
				HMT_very_low();
			}
		}

		HMT_medium();
		ppc64_runlatch_on();
		rcu_idle_exit();
		tick_nohz_idle_exit();
		if (cpu_should_die()) {
			sched_preempt_enable_no_resched();
			cpu_die();
		}
		schedule_preempt_disabled();
	}
}
static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
	
	stop_critical_timings();
	if (cx->entry_method == ACPI_CSTATE_FFH) {
		
		acpi_processor_ffh_cstate_enter(cx);
	} else if (cx->entry_method == ACPI_CSTATE_HALT) {
		acpi_safe_halt();
	} else {
		
		inb(cx->address);
		inl(acpi_gbl_FADT.xpm_timer_block.address);
	}
	start_critical_timings();
}
Beispiel #9
0
/*
 * Generic idle loop implementation
 */
static void cpu_idle_loop(void)
{
	while (1) {
		tick_nohz_idle_enter();

		while (!need_resched()) {
			check_pgt_cache();
			rmb();

			local_irq_disable();
			arch_cpu_idle_enter();

			/*
			 * In poll mode we reenable interrupts and spin.
			 *
			 * Also if we detected in the wakeup from idle
			 * path that the tick broadcast device expired
			 * for us, we don't want to go deep idle as we
			 * know that the IPI is going to arrive right
			 * away
			 */
			if (cpu_idle_force_poll ||
			    tick_check_broadcast_expired() ||
			    __get_cpu_var(idle_force_poll)) {
				cpu_idle_poll();
			} else {
				if (!current_clr_polling_and_test()) {
					stop_critical_timings();
					rcu_idle_enter();
					arch_cpu_idle();
					WARN_ON_ONCE(irqs_disabled());
					rcu_idle_exit();
					start_critical_timings();
				} else {
					local_irq_enable();
				}
				__current_set_polling();
			}
			arch_cpu_idle_exit();
		}
		tick_nohz_idle_exit();
		schedule_preempt_disabled();
		if (cpu_is_offline(smp_processor_id()))
			arch_cpu_idle_dead();

	}
}
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
void cpu_idle(void)
{
	int cpu = smp_processor_id();

	/*
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us.  CPU0 already has it initialized but no harm in
	 * doing it again.  This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
	 */
	boot_init_stack_canary();

	current_thread_info()->status |= TS_POLLING;

	/* endless idle loop with no priority at all */
	while (1) {
		tick_nohz_stop_sched_tick(1);
		while (!need_resched()) {

			check_pgt_cache();
			rmb();

			if (cpu_is_offline(cpu))
				play_dead();

			local_irq_disable();
			enter_idle();
			/* Don't trace irqs off for idle */
			stop_critical_timings();
			pm_idle();
			start_critical_timings();

			/*
			 * In many cases the interrupt that ended idle
			 * has already called exit_idle. But some idle
			 * loops can be woken up without interrupt.
			 */
			__exit_idle();
		}
		tick_nohz_restart_sched_tick();
		preempt_enable_no_resched();
		schedule();
		preempt_disable();
	}
}
Beispiel #11
0
void default_idle(void)
{
	if (likely(hlt_counter)) {
		local_irq_disable();
		stop_critical_timings();
		cpu_relax();
		start_critical_timings();
		local_irq_enable();
	} else {
		clear_thread_flag(TIF_POLLING_NRFLAG);
		smp_mb__after_clear_bit();
		local_irq_disable();
		while (!need_resched())
			cpu_sleep();
		local_irq_enable();
		set_thread_flag(TIF_POLLING_NRFLAG);
	}
}
Beispiel #12
0
static void default_idle(void)
{
	if (!hlt_counter) {
		clear_thread_flag(TIF_POLLING_NRFLAG);
		smp_mb__after_clear_bit();
		set_bl_bit();
		stop_critical_timings();

		while (!need_resched())
			cpu_sleep();

		start_critical_timings();
		clear_bl_bit();
		set_thread_flag(TIF_POLLING_NRFLAG);
	} else
		while (!need_resched())
			cpu_relax();
}
/*
 * The idle thread, has rather strange semantics for calling pm_idle,
 * but this is what x86 does and we need to do the same, so that
 * things like cpuidle get called in the same way.  The only difference
 * is that we always respect 'hlt_counter' to prevent low power idle.
 */
void cpu_idle(void)
{
	local_fiq_enable();

	/* endless idle loop with no priority at all */
	while (1) {
		idle_notifier_call_chain(IDLE_START);
		tick_nohz_idle_enter();
		rcu_idle_enter();
		while (!need_resched()) {
			/*
			 * We need to disable interrupts here
			 * to ensure we don't miss a wakeup call.
			 */
			local_irq_disable();
#ifdef CONFIG_PL310_ERRATA_769419
			wmb();
#endif
			if (hlt_counter) {
				local_irq_enable();
				cpu_relax();
			} else if (!need_resched()) {
				stop_critical_timings();
				if (cpuidle_idle_call())
					pm_idle();
				start_critical_timings();
				/*
				 * pm_idle functions must always
				 * return with IRQs enabled.
				 */
				WARN_ON(irqs_disabled());
			} else
				local_irq_enable();
		}
		rcu_idle_exit();
		tick_nohz_idle_exit();
		idle_notifier_call_chain(IDLE_END);
		schedule_preempt_disabled();
#ifdef CONFIG_HOTPLUG_CPU
		if (cpu_is_offline(smp_processor_id()))
			cpu_die();
#endif
	}
}
/**
 * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
 * @cx: cstate data
 *
 * Caller disables interrupt before call and enables interrupt after return.
 */
static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
	/* Don't trace irqs off for idle */
	stop_critical_timings();
	if (cx->entry_method == ACPI_CSTATE_FFH) {
		/* Call into architectural FFH based C-state */
		acpi_processor_ffh_cstate_enter(cx);
	} else if (cx->entry_method == ACPI_CSTATE_HALT) {
		acpi_safe_halt();
	} else {
		/* IO port based C-state */
		inb(cx->address);
		/* Dummy wait op - must do something useless after P_LVL2 read
		   because chipsets cannot guarantee that STPCLK# signal
		   gets asserted in time to freeze execution properly. */
		inl(acpi_gbl_FADT.xpm_timer_block.address);
	}
	start_critical_timings();
}
Beispiel #15
0
/*
 * The idle thread. There's no useful work to be done, so just try to conserve
 * power and have a low exit latency (ie sit in a loop waiting for somebody to
 * say that they'd like to reschedule)
 */
void cpu_idle(void)
{
	unsigned int cpu = smp_processor_id();

	set_thread_flag(TIF_POLLING_NRFLAG);

	/* endless idle loop with no priority at all */
	while (1) {
		tick_nohz_idle_enter();
		rcu_idle_enter();

		while (!need_resched()) {
			check_pgt_cache();
			rmb();

			if (cpu_is_offline(cpu))
				play_dead();

			local_irq_disable();
			/* Don't trace irqs off for idle */
			stop_critical_timings();
			if (cpuidle_idle_call())
				sh_idle();
			/*
			 * Sanity check to ensure that sh_idle() returns
			 * with IRQs enabled
			 */
			WARN_ON(irqs_disabled());
			start_critical_timings();
		}

		rcu_idle_exit();
		tick_nohz_idle_exit();
		schedule_preempt_disabled();
	}
}
Beispiel #16
0
/*
 * The idle thread. There's no useful work to be done, so just try to conserve
 * power and have a low exit latency (ie sit in a loop waiting for somebody to
 * say that they'd like to reschedule)
 */
void __noreturn cpu_idle(void)
{
	int cpu;

	/* CPU is going idle. */
	cpu = smp_processor_id();

	/* endless idle loop with no priority at all */
	while (1) {
		tick_nohz_idle_enter();
		rcu_idle_enter();
		while (!need_resched() && cpu_online(cpu)) {
#ifdef CONFIG_MIPS_MT_SMTC
			extern void smtc_idle_loop_hook(void);

			smtc_idle_loop_hook();
#endif

			if (cpu_wait) {
				/* Don't trace irqs off for idle */
				stop_critical_timings();
				(*cpu_wait)();
				start_critical_timings();
			}
		}
#ifdef CONFIG_HOTPLUG_CPU
		if (!cpu_online(cpu) && !cpu_isset(cpu, cpu_callin_map) &&
		    (system_state == SYSTEM_RUNNING ||
		     system_state == SYSTEM_BOOTING))
			play_dead();
#endif
		rcu_idle_exit();
		tick_nohz_idle_exit();
		schedule_preempt_disabled();
	}
}
	/* avoid HT sibilings if possible */
	if (cpumask_empty(tmp))
		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
	if (cpumask_empty(tmp)) {
		mutex_unlock(&round_robin_lock);
		return;
	}
	for_each_cpu(cpu, tmp) {
		if (cpu_weight[cpu] < min_weight) {
			min_weight = cpu_weight[cpu];
			preferred_cpu = cpu;
		}
	}

	if (tsk_in_cpu[tsk_index] != -1)
		cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = preferred_cpu;
	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
	cpu_weight[preferred_cpu]++;
	mutex_unlock(&round_robin_lock);

	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
}

static void exit_round_robin(unsigned int tsk_index)
{
	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
	cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = -1;
}

static unsigned int idle_pct = 5; /* percentage */
static unsigned int round_robin_time = 1; /* second */
static int power_saving_thread(void *data)
{
	struct sched_param param = {.sched_priority = 1};
	int do_sleep;
	unsigned int tsk_index = (unsigned long)data;
	u64 last_jiffies = 0;

	sched_setscheduler(current, SCHED_RR, &param);
	set_freezable();

	while (!kthread_should_stop()) {
		int cpu;
		u64 expire_time;

		try_to_freeze();

		/* round robin to cpus */
		if (last_jiffies + round_robin_time * HZ < jiffies) {
			last_jiffies = jiffies;
			round_robin_cpu(tsk_index);
		}

		do_sleep = 0;

		expire_time = jiffies + HZ * (100 - idle_pct) / 100;

		while (!need_resched()) {
			if (tsc_detected_unstable && !tsc_marked_unstable) {
				/* TSC could halt in idle, so notify users */
				mark_tsc_unstable("TSC halts in idle");
				tsc_marked_unstable = 1;
			}
			if (lapic_detected_unstable && !lapic_marked_unstable) {
				int i;
				/* LAPIC could halt in idle, so notify users */
				for_each_online_cpu(i)
					clockevents_notify(
						CLOCK_EVT_NOTIFY_BROADCAST_ON,
						&i);
				lapic_marked_unstable = 1;
			}
			local_irq_disable();
			cpu = smp_processor_id();
			if (lapic_marked_unstable)
				clockevents_notify(
					CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
			stop_critical_timings();

			__monitor((void *)&current_thread_info()->flags, 0, 0);
			smp_mb();
			if (!need_resched())
				__mwait(power_saving_mwait_eax, 1);

			start_critical_timings();
			if (lapic_marked_unstable)
				clockevents_notify(
					CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
			local_irq_enable();

			if (jiffies > expire_time) {
				do_sleep = 1;
				break;
			}
		}

		/*
		 * current sched_rt has threshold for rt task running time.
		 * When a rt task uses 95% CPU time, the rt thread will be
		 * scheduled out for 5% CPU time to not starve other tasks. But
		 * the mechanism only works when all CPUs have RT task running,
		 * as if one CPU hasn't RT task, RT task from other CPUs will
		 * borrow CPU time from this CPU and cause RT task use > 95%
		 * CPU time. To make 'avoid starvation' work, takes a nap here.
		 */
		if (do_sleep)
			schedule_timeout_killable(HZ * idle_pct / 100);
	}

	exit_round_robin(tsk_index);
	return 0;
}

static struct task_struct *ps_tsks[NR_CPUS];
static unsigned int ps_tsk_num;
static int create_power_saving_task(void)
{
	int rc = -ENOMEM;

	ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
		(void *)(unsigned long)ps_tsk_num,
		"acpi_pad/%d", ps_tsk_num);
	rc = PTR_RET(ps_tsks[ps_tsk_num]);
	if (!rc)
		ps_tsk_num++;
	else
		ps_tsks[ps_tsk_num] = NULL;

	return rc;
}
Beispiel #18
0
/**
 * cpuidle_idle_call - the main idle function
 *
 * NOTE: no locks or semaphores should be used here
 */
static void cpuidle_idle_call(void)
{
	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
	int next_state, entered_state;
	bool broadcast;

	/*
	 * Check if the idle task must be rescheduled. If it is the
	 * case, exit the function after re-enabling the local irq.
	 */
	if (need_resched()) {
		local_irq_enable();
		return;
	}

	/*
	 * During the idle period, stop measuring the disabled irqs
	 * critical sections latencies
	 */
	stop_critical_timings();

	/*
	 * Tell the RCU framework we are entering an idle section,
	 * so no more rcu read side critical sections and one more
	 * step to the grace period
	 */
	rcu_idle_enter();

	/*
	 * Ask the cpuidle framework to choose a convenient idle state.
	 * Fall back to the default arch idle method on errors.
	 */
	next_state = cpuidle_select(drv, dev);
	if (next_state < 0) {
use_default:
		/*
		 * We can't use the cpuidle framework, let's use the default
		 * idle routine.
		 */
		if (current_clr_polling_and_test())
			local_irq_enable();
		else
			arch_cpu_idle();

		goto exit_idle;
	}


	/*
	 * The idle task must be scheduled, it is pointless to
	 * go to idle, just update no idle residency and get
	 * out of this function
	 */
	if (current_clr_polling_and_test()) {
		dev->last_residency = 0;
		entered_state = next_state;
		local_irq_enable();
		goto exit_idle;
	}

	broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);

	/*
	 * Tell the time framework to switch to a broadcast timer
	 * because our local timer will be shutdown. If a local timer
	 * is used from another cpu as a broadcast timer, this call may
	 * fail if it is not available
	 */
	if (broadcast &&
	    clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
		goto use_default;

	trace_cpu_idle_rcuidle(next_state, dev->cpu);

	/*
	 * Enter the idle state previously returned by the governor decision.
	 * This function will block until an interrupt occurs and will take
	 * care of re-enabling the local interrupts
	 */
	entered_state = cpuidle_enter(drv, dev, next_state);

	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);

	if (broadcast)
		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);

	/*
	 * Give the governor an opportunity to reflect on the outcome
	 */
	cpuidle_reflect(dev, entered_state);

exit_idle:
	__current_set_polling();

	/*
	 * It is up to the idle functions to reenable local interrupts
	 */
	if (WARN_ON_ONCE(irqs_disabled()))
		local_irq_enable();

	rcu_idle_exit();
	start_critical_timings();
}
/**
 * cpuidle_idle_call - the main idle function
 *
 * NOTE: no locks or semaphores should be used here
 *
 * On archs that support TIF_POLLING_NRFLAG, is called with polling
 * set, and it returns with polling set.  If it ever stops polling, it
 * must clear the polling bit.
 */
static void cpuidle_idle_call(void)
{
	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
	int next_state, entered_state;

	/*
	 * Check if the idle task must be rescheduled. If it is the
	 * case, exit the function after re-enabling the local irq.
	 */
	if (need_resched()) {
		local_irq_enable();
		return;
	}

	/*
	 * During the idle period, stop measuring the disabled irqs
	 * critical sections latencies
	 */
	stop_critical_timings();

	/*
	 * Tell the RCU framework we are entering an idle section,
	 * so no more rcu read side critical sections and one more
	 * step to the grace period
	 */
	rcu_idle_enter();

	/*
	 * Check if the cpuidle framework is ready, otherwise fallback
	 * to the default arch specific idle method
	 */
	next_state = cpuidle_select(drv, dev);
	if (next_state < 0) {
		default_idle_call();
		goto exit_idle;
	}

	/*
	 * The idle task must be scheduled, it is pointless to
	 * go to idle, just update no idle residency and get
	 * out of this function
	 */
	if (current_clr_polling_and_test()) {
		dev->last_residency = 0;
		entered_state = next_state;
		local_irq_enable();
		goto exit_idle;
	}

	/* Take note of the planned idle state. */
	idle_set_state(this_rq(), &drv->states[next_state]);

	/*
	 * Enter the idle state previously returned by the governor decision.
	 * This function will block until an interrupt occurs and will take
	 * care of re-enabling the local interrupts
	 */
	entered_state = cpuidle_enter(drv, dev, next_state);

	/* The cpu is no longer idle or about to enter idle. */
	idle_set_state(this_rq(), NULL);

	if (entered_state == -EBUSY) {
		default_idle_call();
		goto exit_idle;
	}

	/*
	 * Give the governor an opportunity to reflect on the outcome
	 */
	cpuidle_reflect(dev, entered_state);

exit_idle:
	__current_set_polling();

	/*
	 * It is up to the idle functions to reenable local interrupts
	 */
	if (WARN_ON_ONCE(irqs_disabled()))
		local_irq_enable();

	rcu_idle_exit();
	start_critical_timings();
}
Beispiel #20
0
static int clamp_thread(void *arg)
{
	int cpunr = (unsigned long)arg;
	DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
	static const struct sched_param param = {
		.sched_priority = MAX_USER_RT_PRIO/2,
	};
	unsigned int count = 0;
	unsigned int target_ratio;

	set_bit(cpunr, cpu_clamping_mask);
	set_freezable();
	init_timer_on_stack(&wakeup_timer);
	sched_setscheduler(current, SCHED_FIFO, &param);

	while (true == clamping && !kthread_should_stop() &&
		cpu_online(cpunr)) {
		int sleeptime;
		unsigned long target_jiffies;
		unsigned int guard;
		unsigned int compensated_ratio;
		int interval; /* jiffies to sleep for each attempt */
		unsigned int duration_jiffies = msecs_to_jiffies(duration);
		unsigned int window_size_now;

		try_to_freeze();
		/*
		 * make sure user selected ratio does not take effect until
		 * the next round. adjust target_ratio if user has changed
		 * target such that we can converge quickly.
		 */
		target_ratio = set_target_ratio;
		guard = 1 + target_ratio/20;
		window_size_now = window_size;
		count++;

		/*
		 * systems may have different ability to enter package level
		 * c-states, thus we need to compensate the injected idle ratio
		 * to achieve the actual target reported by the HW.
		 */
		compensated_ratio = target_ratio +
			get_compensation(target_ratio);
		if (compensated_ratio <= 0)
			compensated_ratio = 1;
		interval = duration_jiffies * 100 / compensated_ratio;

		/* align idle time */
		target_jiffies = roundup(jiffies, interval);
		sleeptime = target_jiffies - jiffies;
		if (sleeptime <= 0)
			sleeptime = 1;
		schedule_timeout_interruptible(sleeptime);
		/*
		 * only elected controlling cpu can collect stats and update
		 * control parameters.
		 */
		if (cpunr == control_cpu && !(count%window_size_now)) {
			should_skip =
				powerclamp_adjust_controls(target_ratio,
							guard, window_size_now);
			smp_mb();
		}

		if (should_skip)
			continue;

		target_jiffies = jiffies + duration_jiffies;
		mod_timer(&wakeup_timer, target_jiffies);
		if (unlikely(local_softirq_pending()))
			continue;
		/*
		 * stop tick sched during idle time, interrupts are still
		 * allowed. thus jiffies are updated properly.
		 */
		preempt_disable();
		/* mwait until target jiffies is reached */
		while (time_before(jiffies, target_jiffies)) {
			unsigned long ecx = 1;
			unsigned long eax = target_mwait;

			/*
			 * REVISIT: may call enter_idle() to notify drivers who
			 * can save power during cpu idle. same for exit_idle()
			 */
			local_touch_nmi();
			stop_critical_timings();
			mwait_idle_with_hints(eax, ecx);
			start_critical_timings();
			atomic_inc(&idle_wakeup_counter);
		}
		preempt_enable();
	}
	del_timer_sync(&wakeup_timer);
	clear_bit(cpunr, cpu_clamping_mask);

	return 0;
}

/*
 * 1 HZ polling while clamping is active, useful for userspace
 * to monitor actual idle ratio.
 */
static void poll_pkg_cstate(struct work_struct *dummy);
static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
static void poll_pkg_cstate(struct work_struct *dummy)
{
	static u64 msr_last;
	static u64 tsc_last;
	static unsigned long jiffies_last;

	u64 msr_now;
	unsigned long jiffies_now;
	u64 tsc_now;
	u64 val64;

	msr_now = pkg_state_counter();
	tsc_now = rdtsc();
	jiffies_now = jiffies;

	/* calculate pkg cstate vs tsc ratio */
	if (!msr_last || !tsc_last)
		pkg_cstate_ratio_cur = 1;
	else {
		if (tsc_now - tsc_last) {
			val64 = 100 * (msr_now - msr_last);
			do_div(val64, (tsc_now - tsc_last));
			pkg_cstate_ratio_cur = val64;
		}
	}

	/* update record */
	msr_last = msr_now;
	jiffies_last = jiffies_now;
	tsc_last = tsc_now;

	if (true == clamping)
		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
}

static int start_power_clamp(void)
{
	unsigned long cpu;
	struct task_struct *thread;

	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
	/* prevent cpu hotplug */
	get_online_cpus();

	/* prefer BSP */
	control_cpu = 0;
	if (!cpu_online(control_cpu))
		control_cpu = smp_processor_id();

	clamping = true;
	schedule_delayed_work(&poll_pkg_cstate_work, 0);

	/* start one thread per online cpu */
	for_each_online_cpu(cpu) {
		struct task_struct **p =
			per_cpu_ptr(powerclamp_thread, cpu);

		thread = kthread_create_on_node(clamp_thread,
						(void *) cpu,
						cpu_to_node(cpu),
						"kidle_inject/%ld", cpu);
		/* bind to cpu here */
		if (likely(!IS_ERR(thread))) {
			kthread_bind(thread, cpu);
			wake_up_process(thread);
			*p = thread;
		}

	}
	put_online_cpus();

	return 0;
}
Beispiel #21
0
	/* avoid HT sibilings if possible */
	if (cpumask_empty(tmp))
		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
	if (cpumask_empty(tmp)) {
		mutex_unlock(&round_robin_lock);
		return;
	}
	for_each_cpu(cpu, tmp) {
		if (cpu_weight[cpu] < min_weight) {
			min_weight = cpu_weight[cpu];
			preferred_cpu = cpu;
		}
	}

	if (tsk_in_cpu[tsk_index] != -1)
		cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = preferred_cpu;
	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
	cpu_weight[preferred_cpu]++;
	mutex_unlock(&round_robin_lock);

	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
}

static void exit_round_robin(unsigned int tsk_index)
{
	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
	cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = -1;
}

static unsigned int idle_pct = 5; /* percentage */
static unsigned int round_robin_time = 1; /* second */
static int power_saving_thread(void *data)
{
	struct sched_param param = {.sched_priority = 1};
	int do_sleep;
	unsigned int tsk_index = (unsigned long)data;
	u64 last_jiffies = 0;

	sched_setscheduler(current, SCHED_RR, &param);

	while (!kthread_should_stop()) {
		unsigned long expire_time;

		try_to_freeze();

		/* round robin to cpus */
		expire_time = last_jiffies + round_robin_time * HZ;
		if (time_before(expire_time, jiffies)) {
			last_jiffies = jiffies;
			round_robin_cpu(tsk_index);
		}

		do_sleep = 0;

		expire_time = jiffies + HZ * (100 - idle_pct) / 100;

		while (!need_resched()) {
			if (tsc_detected_unstable && !tsc_marked_unstable) {
				/* TSC could halt in idle, so notify users */
				mark_tsc_unstable("TSC halts in idle");
				tsc_marked_unstable = 1;
			}
			local_irq_disable();
			tick_broadcast_enable();
			tick_broadcast_enter();
			stop_critical_timings();

			mwait_idle_with_hints(power_saving_mwait_eax, 1);

			start_critical_timings();
			tick_broadcast_exit();
			local_irq_enable();

			if (time_before(expire_time, jiffies)) {
				do_sleep = 1;
				break;
			}
		}

		/*
		 * current sched_rt has threshold for rt task running time.
		 * When a rt task uses 95% CPU time, the rt thread will be
		 * scheduled out for 5% CPU time to not starve other tasks. But
		 * the mechanism only works when all CPUs have RT task running,
		 * as if one CPU hasn't RT task, RT task from other CPUs will
		 * borrow CPU time from this CPU and cause RT task use > 95%
		 * CPU time. To make 'avoid starvation' work, takes a nap here.
		 */
		if (unlikely(do_sleep))
			schedule_timeout_killable(HZ * idle_pct / 100);

		/* If an external event has set the need_resched flag, then
		 * we need to deal with it, or this loop will continue to
		 * spin without calling __mwait().
		 */
		if (unlikely(need_resched()))
			schedule();
	}

	exit_round_robin(tsk_index);
	return 0;
}

static struct task_struct *ps_tsks[NR_CPUS];
static unsigned int ps_tsk_num;
static int create_power_saving_task(void)
{
	int rc;

	ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
		(void *)(unsigned long)ps_tsk_num,
		"acpi_pad/%d", ps_tsk_num);

	if (IS_ERR(ps_tsks[ps_tsk_num])) {
		rc = PTR_ERR(ps_tsks[ps_tsk_num]);
		ps_tsks[ps_tsk_num] = NULL;
	} else {
		rc = 0;
		ps_tsk_num++;
	}

	return rc;
}
Beispiel #22
0
/**
 * cpuidle_idle_call - the main idle function
 *
 * NOTE: no locks or semaphores should be used here
 *
 * On archs that support TIF_POLLING_NRFLAG, is called with polling
 * set, and it returns with polling set.  If it ever stops polling, it
 * must clear the polling bit.
 */
static void cpuidle_idle_call(void)
{
	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
	int next_state, entered_state;
	unsigned int broadcast;
	bool reflect;

	/*
	 * Check if the idle task must be rescheduled. If it is the
	 * case, exit the function after re-enabling the local irq.
	 */
	if (need_resched()) {
		local_irq_enable();
		return;
	}

	/*
	 * During the idle period, stop measuring the disabled irqs
	 * critical sections latencies
	 */
	stop_critical_timings();

	/*
	 * Tell the RCU framework we are entering an idle section,
	 * so no more rcu read side critical sections and one more
	 * step to the grace period
	 */
	rcu_idle_enter();

	if (cpuidle_not_available(drv, dev))
		goto use_default;

	/*
	 * Suspend-to-idle ("freeze") is a system state in which all user space
	 * has been frozen, all I/O devices have been suspended and the only
	 * activity happens here and in iterrupts (if any).  In that case bypass
	 * the cpuidle governor and go stratight for the deepest idle state
	 * available.  Possibly also suspend the local tick and the entire
	 * timekeeping to prevent timer interrupts from kicking us out of idle
	 * until a proper wakeup interrupt happens.
	 */
	if (idle_should_freeze()) {
		entered_state = cpuidle_enter_freeze(drv, dev);
		if (entered_state >= 0) {
			local_irq_enable();
			goto exit_idle;
		}

		reflect = false;
		next_state = cpuidle_find_deepest_state(drv, dev);
	} else {
		reflect = true;
		/*
		 * Ask the cpuidle framework to choose a convenient idle state.
		 */
		next_state = cpuidle_select(drv, dev);
	}
	/* Fall back to the default arch idle method on errors. */
	if (next_state < 0)
		goto use_default;

	/*
	 * The idle task must be scheduled, it is pointless to
	 * go to idle, just update no idle residency and get
	 * out of this function
	 */
	if (current_clr_polling_and_test()) {
		dev->last_residency = 0;
		entered_state = next_state;
		local_irq_enable();
		goto exit_idle;
	}

	broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP;

	/*
	 * Tell the time framework to switch to a broadcast timer
	 * because our local timer will be shutdown. If a local timer
	 * is used from another cpu as a broadcast timer, this call may
	 * fail if it is not available
	 */
	if (broadcast && tick_broadcast_enter())
		goto use_default;

	/* Take note of the planned idle state. */
	idle_set_state(this_rq(), &drv->states[next_state]);

	/*
	 * Enter the idle state previously returned by the governor decision.
	 * This function will block until an interrupt occurs and will take
	 * care of re-enabling the local interrupts
	 */
	entered_state = cpuidle_enter(drv, dev, next_state);

	/* The cpu is no longer idle or about to enter idle. */
	idle_set_state(this_rq(), NULL);

	if (broadcast)
		tick_broadcast_exit();

	/*
	 * Give the governor an opportunity to reflect on the outcome
	 */
	if (reflect)
		cpuidle_reflect(dev, entered_state);

exit_idle:
	__current_set_polling();

	/*
	 * It is up to the idle functions to reenable local interrupts
	 */
	if (WARN_ON_ONCE(irqs_disabled()))
		local_irq_enable();

	rcu_idle_exit();
	start_critical_timings();
	return;

use_default:
	/*
	 * We can't use the cpuidle framework, let's use the default
	 * idle routine.
	 */
	if (current_clr_polling_and_test())
		local_irq_enable();
	else
		arch_cpu_idle();

	goto exit_idle;
}
	/* avoid HT sibilings if possible */
	if (cpumask_empty(tmp))
		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
	if (cpumask_empty(tmp)) {
		mutex_unlock(&isolated_cpus_lock);
		return;
	}
	for_each_cpu(cpu, tmp) {
		if (cpu_weight[cpu] < min_weight) {
			min_weight = cpu_weight[cpu];
			preferred_cpu = cpu;
		}
	}

	if (tsk_in_cpu[tsk_index] != -1)
		cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = preferred_cpu;
	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
	cpu_weight[preferred_cpu]++;
	mutex_unlock(&isolated_cpus_lock);

	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
}

static void exit_round_robin(unsigned int tsk_index)
{
	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
	cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = -1;
}

static unsigned int idle_pct = 5; /* percentage */
static unsigned int round_robin_time = 10; /* second */
static int power_saving_thread(void *data)
{
	struct sched_param param = {.sched_priority = 1};
	int do_sleep;
	unsigned int tsk_index = (unsigned long)data;
	u64 last_jiffies = 0;

	sched_setscheduler(current, SCHED_RR, &param);

	while (!kthread_should_stop()) {
		int cpu;
		u64 expire_time;

		try_to_freeze();

		/* round robin to cpus */
		if (last_jiffies + round_robin_time * HZ < jiffies) {
			last_jiffies = jiffies;
			round_robin_cpu(tsk_index);
		}

		do_sleep = 0;

		current_thread_info()->status &= ~TS_POLLING;
		/*
		 * TS_POLLING-cleared state must be visible before we test
		 * NEED_RESCHED:
		 */
		smp_mb();

		expire_time = jiffies + HZ * (100 - idle_pct) / 100;

		while (!need_resched()) {
			local_irq_disable();
			cpu = smp_processor_id();
			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
				&cpu);
			stop_critical_timings();

			__monitor((void *)&current_thread_info()->flags, 0, 0);
			smp_mb();
			if (!need_resched())
				__mwait(power_saving_mwait_eax, 1);

			start_critical_timings();
			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
				&cpu);
			local_irq_enable();

			if (jiffies > expire_time) {
				do_sleep = 1;
				break;
			}
		}

		current_thread_info()->status |= TS_POLLING;

		/*
		 * current sched_rt has threshold for rt task running time.
		 * When a rt task uses 95% CPU time, the rt thread will be
		 * scheduled out for 5% CPU time to not starve other tasks. But
		 * the mechanism only works when all CPUs have RT task running,
		 * as if one CPU hasn't RT task, RT task from other CPUs will
		 * borrow CPU time from this CPU and cause RT task use > 95%
		 * CPU time. To make 'avoid starvation' work, takes a nap here.
		 */
		if (do_sleep)
			schedule_timeout_killable(HZ * idle_pct / 100);
	}

	exit_round_robin(tsk_index);
	return 0;
}

static struct task_struct *ps_tsks[NR_CPUS];
static unsigned int ps_tsk_num;
static int create_power_saving_task(void)
{
	int rc = -ENOMEM;

	ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
		(void *)(unsigned long)ps_tsk_num,
		"power_saving/%d", ps_tsk_num);
	rc = IS_ERR(ps_tsks[ps_tsk_num]) ? PTR_ERR(ps_tsks[ps_tsk_num]) : 0;
	if (!rc)
		ps_tsk_num++;
	else
		ps_tsks[ps_tsk_num] = NULL;

	return rc;
}