/* Power(C) State timer broadcast control */ static void lapic_timer_state_broadcast(struct acpi_processor *pr, struct acpi_processor_cx *cx, int broadcast) { int state = cx - pr->power.states; if (state >= pr->power.timer_broadcast_on_state) { if (broadcast) tick_broadcast_enter(); else tick_broadcast_exit(); } }
static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { tick_broadcast_enter(); smp_wmb(); cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); tick_broadcast_exit(); return true; }
static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { /* All CPUs entering LP2 is not working. * Don't let CPU0 enter LP2 when any secondary CPU is online. */ if (num_online_cpus() > 1 || !tegra_cpu_rail_off_ready()) { cpu_do_idle(); return false; } tick_broadcast_enter(); tegra_idle_lp2_last(); tick_broadcast_exit(); return true; }
/* * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same * way as C3 power states (local apic timer and TSC stop) */ static void amd_e400_idle(void) { if (!amd_e400_c1e_detected) { u32 lo, hi; rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); pr_info("System has AMD C1E enabled\n"); } } if (amd_e400_c1e_detected) { int cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { cpumask_set_cpu(cpu, amd_e400_c1e_mask); /* Force broadcast so ACPI can not interfere. */ tick_broadcast_force(); pr_info("Switch to broadcast mode on CPU%d\n", cpu); } tick_broadcast_enter(); default_idle(); /* * The switch back from broadcast mode needs to be * called with interrupts disabled. */ local_irq_disable(); tick_broadcast_exit(); local_irq_enable(); } else default_idle(); }
/* * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power * states (local apic timer and TSC stop). */ static void amd_e400_idle(void) { /* * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E * gets set after static_cpu_has() places have been converted via * alternatives. */ if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { default_idle(); return; } tick_broadcast_enter(); default_idle(); /* * The switch back from broadcast mode needs to be called with * interrupts disabled. */ local_irq_disable(); tick_broadcast_exit(); local_irq_enable(); }
/* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } static void exit_round_robin(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = -1; } static unsigned int idle_pct = 5; /* percentage */ static unsigned int round_robin_time = 1; /* second */ static int power_saving_thread(void *data) { struct sched_param param = {.sched_priority = 1}; int do_sleep; unsigned int tsk_index = (unsigned long)data; u64 last_jiffies = 0; sched_setscheduler(current, SCHED_RR, ¶m); while (!kthread_should_stop()) { unsigned long expire_time; try_to_freeze(); /* round robin to cpus */ expire_time = last_jiffies + round_robin_time * HZ; if (time_before(expire_time, jiffies)) { last_jiffies = jiffies; round_robin_cpu(tsk_index); } do_sleep = 0; expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { if (tsc_detected_unstable && !tsc_marked_unstable) { /* TSC could halt in idle, so notify users */ mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } local_irq_disable(); tick_broadcast_enable(); tick_broadcast_enter(); stop_critical_timings(); mwait_idle_with_hints(power_saving_mwait_eax, 1); start_critical_timings(); tick_broadcast_exit(); local_irq_enable(); if (time_before(expire_time, jiffies)) { do_sleep = 1; break; } } /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be * scheduled out for 5% CPU time to not starve other tasks. But * the mechanism only works when all CPUs have RT task running, * as if one CPU hasn't RT task, RT task from other CPUs will * borrow CPU time from this CPU and cause RT task use > 95% * CPU time. To make 'avoid starvation' work, takes a nap here. */ if (unlikely(do_sleep)) schedule_timeout_killable(HZ * idle_pct / 100); /* If an external event has set the need_resched flag, then * we need to deal with it, or this loop will continue to * spin without calling __mwait(). */ if (unlikely(need_resched())) schedule(); } exit_round_robin(tsk_index); return 0; } static struct task_struct *ps_tsks[NR_CPUS]; static unsigned int ps_tsk_num; static int create_power_saving_task(void) { int rc; ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, (void *)(unsigned long)ps_tsk_num, "acpi_pad/%d", ps_tsk_num); if (IS_ERR(ps_tsks[ps_tsk_num])) { rc = PTR_ERR(ps_tsks[ps_tsk_num]); ps_tsks[ps_tsk_num] = NULL; } else { rc = 0; ps_tsk_num++; } return rc; }
/** * cpuidle_idle_call - the main idle function * * NOTE: no locks or semaphores should be used here * * On archs that support TIF_POLLING_NRFLAG, is called with polling * set, and it returns with polling set. If it ever stops polling, it * must clear the polling bit. */ static void cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; unsigned int broadcast; bool reflect; /* * Check if the idle task must be rescheduled. If it is the * case, exit the function after re-enabling the local irq. */ if (need_resched()) { local_irq_enable(); return; } /* * During the idle period, stop measuring the disabled irqs * critical sections latencies */ stop_critical_timings(); /* * Tell the RCU framework we are entering an idle section, * so no more rcu read side critical sections and one more * step to the grace period */ rcu_idle_enter(); if (cpuidle_not_available(drv, dev)) goto use_default; /* * Suspend-to-idle ("freeze") is a system state in which all user space * has been frozen, all I/O devices have been suspended and the only * activity happens here and in iterrupts (if any). In that case bypass * the cpuidle governor and go stratight for the deepest idle state * available. Possibly also suspend the local tick and the entire * timekeeping to prevent timer interrupts from kicking us out of idle * until a proper wakeup interrupt happens. */ if (idle_should_freeze()) { entered_state = cpuidle_enter_freeze(drv, dev); if (entered_state >= 0) { local_irq_enable(); goto exit_idle; } reflect = false; next_state = cpuidle_find_deepest_state(drv, dev); } else { reflect = true; /* * Ask the cpuidle framework to choose a convenient idle state. */ next_state = cpuidle_select(drv, dev); } /* Fall back to the default arch idle method on errors. */ if (next_state < 0) goto use_default; /* * The idle task must be scheduled, it is pointless to * go to idle, just update no idle residency and get * out of this function */ if (current_clr_polling_and_test()) { dev->last_residency = 0; entered_state = next_state; local_irq_enable(); goto exit_idle; } broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; /* * Tell the time framework to switch to a broadcast timer * because our local timer will be shutdown. If a local timer * is used from another cpu as a broadcast timer, this call may * fail if it is not available */ if (broadcast && tick_broadcast_enter()) goto use_default; /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); /* * Enter the idle state previously returned by the governor decision. * This function will block until an interrupt occurs and will take * care of re-enabling the local interrupts */ entered_state = cpuidle_enter(drv, dev, next_state); /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); if (broadcast) tick_broadcast_exit(); /* * Give the governor an opportunity to reflect on the outcome */ if (reflect) cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); /* * It is up to the idle functions to reenable local interrupts */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); rcu_idle_exit(); start_critical_timings(); return; use_default: /* * We can't use the cpuidle framework, let's use the default * idle routine. */ if (current_clr_polling_and_test()) local_irq_enable(); else arch_cpu_idle(); goto exit_idle; }