void mwait_idle_with_hints(unsigned int eax, unsigned int ecx) { unsigned int cpu = smp_processor_id(); s_time_t expires = per_cpu(timer_deadline, cpu); if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) ) { mb(); clflush((void *)&mwait_wakeup(cpu)); mb(); } __monitor((void *)&mwait_wakeup(cpu), 0, 0); smp_mb(); /* * Timer deadline passing is the event on which we will be woken via * cpuidle_mwait_wakeup. So check it now that the location is armed. */ if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) ) { cpumask_set_cpu(cpu, &cpuidle_mwait_flags); __mwait(eax, ecx); cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); } if ( expires <= NOW() && expires > 0 ) raise_softirq(TIMER_SOFTIRQ); }
static inline void aquire(struct liblock_impl* impl) { while(__sync_val_compare_and_swap(&impl->lock, 0, 1)) { __monitor(&impl->lock, 0, 0); if(!impl->lock) __mwait(&impl->lock, 0); } }
/* * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, * which can obviate IPI to trigger checking of need_resched. * We execute MONITOR against need_resched and enter optimized wait state * through MWAIT. Whenever someone changes need_resched, we would be woken * up from MWAIT (without an IPI). * * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(eax, ecx); } }
static void acpi_dead_idle(void) { struct acpi_processor_power *power; struct acpi_processor_cx *cx; void *mwait_ptr; if ( (power = processor_powers[smp_processor_id()]) == NULL ) goto default_halt; if ( (cx = &power->states[power->count-1]) == NULL ) goto default_halt; mwait_ptr = (void *)&mwait_wakeup(smp_processor_id()); if ( cx->entry_method == ACPI_CSTATE_EM_FFH ) { /* * cache must be flashed as the last ops before cpu going into dead, * otherwise, cpu may dead with dirty data breaking cache coherency, * leading to strange errors. */ wbinvd(); while ( 1 ) { /* * 1. The CLFLUSH is a workaround for erratum AAI65 for * the Xeon 7400 series. * 2. The WBINVD is insufficient due to the spurious-wakeup * case where we return around the loop. * 3. Unlike wbinvd, clflush is a light weight but not serializing * instruction, hence memory fence is necessary to make sure all * load/store visible before flush cache line. */ mb(); clflush(mwait_ptr); __monitor(mwait_ptr, 0, 0); mb(); __mwait(cx->address, 0); } } default_halt: wbinvd(); for ( ; ; ) halt(); }
static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { unsigned int cpu = smp_processor_id(); s_time_t expires = per_cpu(timer_deadline, cpu); __monitor((void *)&mwait_wakeup(cpu), 0, 0); smp_mb(); /* * Timer deadline passing is the event on which we will be woken via * cpuidle_mwait_wakeup. So check it now that the location is armed. */ if ( expires > NOW() || expires == 0 ) { cpumask_set_cpu(cpu, &cpuidle_mwait_flags); __mwait(eax, ecx); cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); } if ( expires <= NOW() && expires > 0 ) raise_softirq(TIMER_SOFTIRQ); }
/* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } static void exit_round_robin(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = -1; } static unsigned int idle_pct = 5; /* percentage */ static unsigned int round_robin_time = 1; /* second */ static int power_saving_thread(void *data) { struct sched_param param = {.sched_priority = 1}; int do_sleep; unsigned int tsk_index = (unsigned long)data; u64 last_jiffies = 0; sched_setscheduler(current, SCHED_RR, ¶m); set_freezable(); while (!kthread_should_stop()) { int cpu; u64 expire_time; try_to_freeze(); /* round robin to cpus */ if (last_jiffies + round_robin_time * HZ < jiffies) { last_jiffies = jiffies; round_robin_cpu(tsk_index); } do_sleep = 0; expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { if (tsc_detected_unstable && !tsc_marked_unstable) { /* TSC could halt in idle, so notify users */ mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } if (lapic_detected_unstable && !lapic_marked_unstable) { int i; /* LAPIC could halt in idle, so notify users */ for_each_online_cpu(i) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ON, &i); lapic_marked_unstable = 1; } local_irq_disable(); cpu = smp_processor_id(); if (lapic_marked_unstable) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); stop_critical_timings(); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(power_saving_mwait_eax, 1); start_critical_timings(); if (lapic_marked_unstable) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); local_irq_enable(); if (jiffies > expire_time) { do_sleep = 1; break; } } /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be * scheduled out for 5% CPU time to not starve other tasks. But * the mechanism only works when all CPUs have RT task running, * as if one CPU hasn't RT task, RT task from other CPUs will * borrow CPU time from this CPU and cause RT task use > 95% * CPU time. To make 'avoid starvation' work, takes a nap here. */ if (do_sleep) schedule_timeout_killable(HZ * idle_pct / 100); } exit_round_robin(tsk_index); return 0; } static struct task_struct *ps_tsks[NR_CPUS]; static unsigned int ps_tsk_num; static int create_power_saving_task(void) { int rc = -ENOMEM; ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, (void *)(unsigned long)ps_tsk_num, "acpi_pad/%d", ps_tsk_num); rc = PTR_RET(ps_tsks[ps_tsk_num]); if (!rc) ps_tsk_num++; else ps_tsks[ps_tsk_num] = NULL; return rc; }
static void acpi_dead_idle(void) { struct acpi_processor_power *power; struct acpi_processor_cx *cx; if ( (power = processor_powers[smp_processor_id()]) == NULL ) goto default_halt; if ( (cx = &power->states[power->count-1]) == NULL ) goto default_halt; if ( cx->entry_method == ACPI_CSTATE_EM_FFH ) { void *mwait_ptr = &mwait_wakeup(smp_processor_id()); /* * Cache must be flushed as the last operation before sleeping. * Otherwise, CPU may still hold dirty data, breaking cache coherency, * leading to strange errors. */ wbinvd(); while ( 1 ) { /* * 1. The CLFLUSH is a workaround for erratum AAI65 for * the Xeon 7400 series. * 2. The WBINVD is insufficient due to the spurious-wakeup * case where we return around the loop. * 3. Unlike wbinvd, clflush is a light weight but not serializing * instruction, hence memory fence is necessary to make sure all * load/store visible before flush cache line. */ mb(); clflush(mwait_ptr); __monitor(mwait_ptr, 0, 0); mb(); __mwait(cx->address, 0); } } else if ( current_cpu_data.x86_vendor == X86_VENDOR_AMD && cx->entry_method == ACPI_CSTATE_EM_SYSIO ) { /* Intel prefers not to use SYSIO */ /* Avoid references to shared data after the cache flush */ u32 address = cx->address; u32 pmtmr_ioport_local = pmtmr_ioport; wbinvd(); while ( 1 ) { inb(address); inl(pmtmr_ioport_local); } } default_halt: for ( ; ; ) halt(); }
/* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&isolated_cpus_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&isolated_cpus_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } static void exit_round_robin(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = -1; } static unsigned int idle_pct = 5; /* percentage */ static unsigned int round_robin_time = 10; /* second */ static int power_saving_thread(void *data) { struct sched_param param = {.sched_priority = 1}; int do_sleep; unsigned int tsk_index = (unsigned long)data; u64 last_jiffies = 0; sched_setscheduler(current, SCHED_RR, ¶m); while (!kthread_should_stop()) { int cpu; u64 expire_time; try_to_freeze(); /* round robin to cpus */ if (last_jiffies + round_robin_time * HZ < jiffies) { last_jiffies = jiffies; round_robin_cpu(tsk_index); } do_sleep = 0; current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we test * NEED_RESCHED: */ smp_mb(); expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { local_irq_disable(); cpu = smp_processor_id(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); stop_critical_timings(); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(power_saving_mwait_eax, 1); start_critical_timings(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); local_irq_enable(); if (jiffies > expire_time) { do_sleep = 1; break; } } current_thread_info()->status |= TS_POLLING; /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be * scheduled out for 5% CPU time to not starve other tasks. But * the mechanism only works when all CPUs have RT task running, * as if one CPU hasn't RT task, RT task from other CPUs will * borrow CPU time from this CPU and cause RT task use > 95% * CPU time. To make 'avoid starvation' work, takes a nap here. */ if (do_sleep) schedule_timeout_killable(HZ * idle_pct / 100); } exit_round_robin(tsk_index); return 0; } static struct task_struct *ps_tsks[NR_CPUS]; static unsigned int ps_tsk_num; static int create_power_saving_task(void) { int rc = -ENOMEM; ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, (void *)(unsigned long)ps_tsk_num, "power_saving/%d", ps_tsk_num); rc = IS_ERR(ps_tsks[ps_tsk_num]) ? PTR_ERR(ps_tsks[ps_tsk_num]) : 0; if (!rc) ps_tsk_num++; else ps_tsks[ps_tsk_num] = NULL; return rc; }