static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK); }
/* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } static void exit_round_robin(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = -1; } static unsigned int idle_pct = 5; /* percentage */ static unsigned int round_robin_time = 1; /* second */ static int power_saving_thread(void *data) { struct sched_param param = {.sched_priority = 1}; int do_sleep; unsigned int tsk_index = (unsigned long)data; u64 last_jiffies = 0; sched_setscheduler(current, SCHED_RR, ¶m); while (!kthread_should_stop()) { int cpu; unsigned long expire_time; try_to_freeze(); /* round robin to cpus */ expire_time = last_jiffies + round_robin_time * HZ; if (time_before(expire_time, jiffies)) { last_jiffies = jiffies; round_robin_cpu(tsk_index); } do_sleep = 0; expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { if (tsc_detected_unstable && !tsc_marked_unstable) { /* TSC could halt in idle, so notify users */ mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } if (lapic_detected_unstable && !lapic_marked_unstable) { int i; /* LAPIC could halt in idle, so notify users */ for_each_online_cpu(i) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ON, &i); lapic_marked_unstable = 1; } local_irq_disable(); cpu = smp_processor_id(); if (lapic_marked_unstable) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); stop_critical_timings(); mwait_idle_with_hints(power_saving_mwait_eax, 1); start_critical_timings(); if (lapic_marked_unstable) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); local_irq_enable(); if (time_before(expire_time, jiffies)) { do_sleep = 1; break; } } /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be * scheduled out for 5% CPU time to not starve other tasks. But * the mechanism only works when all CPUs have RT task running, * as if one CPU hasn't RT task, RT task from other CPUs will * borrow CPU time from this CPU and cause RT task use > 95% * CPU time. To make 'avoid starvation' work, takes a nap here. */ if (unlikely(do_sleep)) schedule_timeout_killable(HZ * idle_pct / 100); /* If an external event has set the need_resched flag, then * we need to deal with it, or this loop will continue to * spin without calling __mwait(). */ if (unlikely(need_resched())) schedule(); } exit_round_robin(tsk_index); return 0; } static struct task_struct *ps_tsks[NR_CPUS]; static unsigned int ps_tsk_num; static int create_power_saving_task(void) { int rc; ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, (void *)(unsigned long)ps_tsk_num, "acpi_pad/%d", ps_tsk_num); if (IS_ERR(ps_tsks[ps_tsk_num])) { rc = PTR_ERR(ps_tsks[ps_tsk_num]); ps_tsks[ps_tsk_num] = NULL; } else { rc = 0; ps_tsk_num++; } return rc; }
static int clamp_thread(void *arg) { int cpunr = (unsigned long)arg; DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); static const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; unsigned int count = 0; unsigned int target_ratio; set_bit(cpunr, cpu_clamping_mask); set_freezable(); init_timer_on_stack(&wakeup_timer); sched_setscheduler(current, SCHED_FIFO, ¶m); while (true == clamping && !kthread_should_stop() && cpu_online(cpunr)) { int sleeptime; unsigned long target_jiffies; unsigned int guard; unsigned int compensated_ratio; int interval; /* jiffies to sleep for each attempt */ unsigned int duration_jiffies = msecs_to_jiffies(duration); unsigned int window_size_now; try_to_freeze(); /* * make sure user selected ratio does not take effect until * the next round. adjust target_ratio if user has changed * target such that we can converge quickly. */ target_ratio = set_target_ratio; guard = 1 + target_ratio/20; window_size_now = window_size; count++; /* * systems may have different ability to enter package level * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ compensated_ratio = target_ratio + get_compensation(target_ratio); if (compensated_ratio <= 0) compensated_ratio = 1; interval = duration_jiffies * 100 / compensated_ratio; /* align idle time */ target_jiffies = roundup(jiffies, interval); sleeptime = target_jiffies - jiffies; if (sleeptime <= 0) sleeptime = 1; schedule_timeout_interruptible(sleeptime); /* * only elected controlling cpu can collect stats and update * control parameters. */ if (cpunr == control_cpu && !(count%window_size_now)) { should_skip = powerclamp_adjust_controls(target_ratio, guard, window_size_now); smp_mb(); } if (should_skip) continue; target_jiffies = jiffies + duration_jiffies; mod_timer(&wakeup_timer, target_jiffies); if (unlikely(local_softirq_pending())) continue; /* * stop tick sched during idle time, interrupts are still * allowed. thus jiffies are updated properly. */ preempt_disable(); /* mwait until target jiffies is reached */ while (time_before(jiffies, target_jiffies)) { unsigned long ecx = 1; unsigned long eax = target_mwait; /* * REVISIT: may call enter_idle() to notify drivers who * can save power during cpu idle. same for exit_idle() */ local_touch_nmi(); stop_critical_timings(); mwait_idle_with_hints(eax, ecx); start_critical_timings(); atomic_inc(&idle_wakeup_counter); } preempt_enable(); } del_timer_sync(&wakeup_timer); clear_bit(cpunr, cpu_clamping_mask); return 0; } /* * 1 HZ polling while clamping is active, useful for userspace * to monitor actual idle ratio. */ static void poll_pkg_cstate(struct work_struct *dummy); static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); static void poll_pkg_cstate(struct work_struct *dummy) { static u64 msr_last; static u64 tsc_last; static unsigned long jiffies_last; u64 msr_now; unsigned long jiffies_now; u64 tsc_now; u64 val64; msr_now = pkg_state_counter(); tsc_now = rdtsc(); jiffies_now = jiffies; /* calculate pkg cstate vs tsc ratio */ if (!msr_last || !tsc_last) pkg_cstate_ratio_cur = 1; else { if (tsc_now - tsc_last) { val64 = 100 * (msr_now - msr_last); do_div(val64, (tsc_now - tsc_last)); pkg_cstate_ratio_cur = val64; } } /* update record */ msr_last = msr_now; jiffies_last = jiffies_now; tsc_last = tsc_now; if (true == clamping) schedule_delayed_work(&poll_pkg_cstate_work, HZ); } static int start_power_clamp(void) { unsigned long cpu; struct task_struct *thread; set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); /* prevent cpu hotplug */ get_online_cpus(); /* prefer BSP */ control_cpu = 0; if (!cpu_online(control_cpu)) control_cpu = smp_processor_id(); clamping = true; schedule_delayed_work(&poll_pkg_cstate_work, 0); /* start one thread per online cpu */ for_each_online_cpu(cpu) { struct task_struct **p = per_cpu_ptr(powerclamp_thread, cpu); thread = kthread_create_on_node(clamp_thread, (void *) cpu, cpu_to_node(cpu), "kidle_inject/%ld", cpu); /* bind to cpu here */ if (likely(!IS_ERR(thread))) { kthread_bind(thread, cpu); wake_up_process(thread); *p = thread; } } put_online_cpus(); return 0; }
/* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { local_irq_enable(); mwait_idle_with_hints(0, 0); }