/* * Source CPU calls into this - it waits for the freshly booted * target CPU to arrive and then starts the measurement: */ void __cpuinit check_tsc_sync_source(int cpu) { unsigned long flags; int cpus = 2; /* * No need to check if we already know that the TSC is not * synchronized: */ if (unsynchronized_tsc()) return; printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", smp_processor_id(), cpu); /* * Reset it - in case this is a second bootup: */ atomic_set(&stop_count, 0); /* * Wait for the target to arrive: */ local_save_flags(flags); local_irq_enable(); while (atomic_read(&start_count) != cpus-1) cpu_relax(); local_irq_restore(flags); /* * Trigger the target to continue into the measurement too: */ atomic_inc(&start_count); check_tsc_warp(); while (atomic_read(&stop_count) != cpus-1) cpu_relax(); /* * Reset it - just in case we boot another CPU later: */ atomic_set(&start_count, 0); if (nr_warps) { printk("\n"); printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," " turning off TSC clock.\n", max_warp); mark_tsc_unstable(); nr_warps = 0; max_warp = 0; last_tsc = 0; } else { printk(" passed.\n"); } /* * Let the target continue with the bootup: */ atomic_inc(&stop_count); }
/* Hook from generic ACPI tables.c */ static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) || !strncmp(oem_table_id, "EXA", 3))){ mark_tsc_unstable("Summit based system"); use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); return 1; } return 0; }
static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { if (!strncmp(oem, "IBM ENSW", 8) && (!strncmp(productid, "VIGIL SMP", 9) || !strncmp(productid, "EXA", 3) || !strncmp(productid, "RUTHLESS SMP", 12))){ mark_tsc_unstable("Summit based system"); use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); return 1; } return 0; }
static void __init ms_hyperv_init_platform(void) { /* * Extract the features and hints */ ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); pr_info("HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { /* * Get the APIC frequency. */ u64 hv_lapic_frequency; rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); lapic_timer_frequency = hv_lapic_frequency; pr_info("HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); } register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, "hv_nmi_unknown"); #endif if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); #ifdef CONFIG_X86_IO_APIC no_timer_check = 1; #endif #if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif mark_tsc_unstable("running on Hyper-V"); /* * Generation 2 instances don't support reading the NMI status from * 0x61 port. */ if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; }
static void power_saving_mwait_init(void) { unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; int i; if (!boot_cpu_has(X86_FEATURE_MWAIT)) return; if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) return; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) return; edx >>= MWAIT_SUBSTATE_SIZE; for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { if (edx & MWAIT_SUBSTATE_MASK) { highest_cstate = i; highest_subcstate = edx & MWAIT_SUBSTATE_MASK; } } power_saving_mwait_eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | (highest_subcstate - 1); for_each_online_cpu(i) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &i); #if defined(CONFIG_GENERIC_TIME) && defined(CONFIG_X86) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_INTEL: /* * AMD Fam10h TSC will tick in all * C/P/S0/S1 states when this bit is set. */ if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; /*FALL THROUGH*/ default: /* TSC could halt in idle, so notify users */ mark_tsc_unstable("TSC halts in idle"); } #endif }
static void tsc_check_state(int state) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_INTEL: if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; default: if (state > ACPI_STATE_C1) mark_tsc_unstable("TSC halts in idle"); } }
static void tsc_check_state(int state) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_INTEL: /* * AMD Fam10h TSC will tick in all * C/P/S0/S1 states when this bit is set. */ if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; /*FALL THROUGH*/ default: /* TSC could halt in idle, so notify users */ if (state > ACPI_STATE_C1) mark_tsc_unstable("TSC halts in idle"); } }
static int __init kretprobe_init(void) { int ret; my_kretprobe.kp.symbol_name = func_name; ret = register_kretprobe(&my_kretprobe); if (ret < 0) { printk(KERN_INFO "register_kretprobe failed, returned %d\n", ret); return -1; } mark_tsc_unstable("kernel uptime bug"); printk(KERN_INFO "Planted return probe at %s: %p\n", my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr); return 0; }
static void __init ms_hyperv_init_platform(void) { /* * Extract the features and hints */ ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); pr_info("HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { /* * Get the APIC frequency. */ u64 hv_lapic_frequency; rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); lapic_timer_frequency = hv_lapic_frequency; pr_info("HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); } #endif if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); #ifdef CONFIG_X86_IO_APIC no_timer_check = 1; #endif #if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif mark_tsc_unstable("running on Hyper-V"); }
void __init arch_post_acpi_subsys_init(void) { u32 lo, hi; if (!boot_cpu_has_bug(X86_BUG_AMD_E400)) return; /* * AMD E400 detection needs to happen after ACPI has been enabled. If * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in * MSR_K8_INT_PENDING_MSG. */ rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (!(lo & K8_INTP_C1E_ACTIVE_MASK)) return; boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E); if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); pr_info("System has AMD C1E enabled\n"); }
/* * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same * way as C3 power states (local apic timer and TSC stop) */ static void amd_e400_idle(void) { if (!amd_e400_c1e_detected) { u32 lo, hi; rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); pr_info("System has AMD C1E enabled\n"); } } if (amd_e400_c1e_detected) { int cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { cpumask_set_cpu(cpu, amd_e400_c1e_mask); /* Force broadcast so ACPI can not interfere. */ tick_broadcast_force(); pr_info("Switch to broadcast mode on CPU%d\n", cpu); } tick_broadcast_enter(); default_idle(); /* * The switch back from broadcast mode needs to be * called with interrupts disabled. */ local_irq_disable(); tick_broadcast_exit(); local_irq_enable(); } else default_idle(); }
/* * Source CPU calls into this - it waits for the freshly booted * target CPU to arrive and then starts the measurement: */ void __cpuinit check_tsc_sync_source(int cpu) { int cpus = 2; /* * No need to check if we already know that the TSC is not * synchronized: */ if (unsynchronized_tsc()) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { printk(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); return; } printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", smp_processor_id(), cpu); /* * Reset it - in case this is a second bootup: */ atomic_set(&stop_count, 0); /* * Wait for the target to arrive: */ while (atomic_read(&start_count) != cpus-1) cpu_relax(); /* * Trigger the target to continue into the measurement too: */ atomic_inc(&start_count); check_tsc_warp(); while (atomic_read(&stop_count) != cpus-1) cpu_relax(); if (nr_warps) { printk("\n"); printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," " turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { printk(" passed.\n"); } /* * Reset it - just in case we boot another CPU later: */ atomic_set(&start_count, 0); nr_warps = 0; max_warp = 0; last_tsc = 0; /* * Let the target continue with the bootup: */ atomic_inc(&stop_count); }
/** * acpi_idle_enter_bm - enters C3 with proper BM handling * @dev: the target CPU * @state: the state data * * If BM is detected, the deepest non-C3 idle state is entered instead. */ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct cpuidle_state *state) { struct acpi_processor *pr; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); u32 t1, t2; int sleep_ticks = 0; pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; if (acpi_idle_suspend) return(acpi_idle_enter_c1(dev, state)); if (acpi_idle_bm_check()) { if (dev->safe_state) { dev->last_state = dev->safe_state; return dev->safe_state->enter(dev, dev->safe_state); } else { local_irq_disable(); acpi_safe_halt(); local_irq_enable(); return 0; } } local_irq_disable(); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we test * NEED_RESCHED: */ smp_mb(); if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; local_irq_enable(); return 0; } acpi_unlazy_tlb(smp_processor_id()); /* Tell the scheduler that we are going deep-idle: */ sched_clock_idle_sleep_event(); /* * Must be done before busmaster disable as we might need to * access HPET ! */ acpi_state_timer_broadcast(pr, cx, 1); /* * disable bus master * bm_check implies we need ARB_DIS * !bm_check implies we need cache flush * bm_control implies whether we can do ARB_DIS * * That leaves a case where bm_check is set and bm_control is * not set. In that case we cannot do much, we enter C3 * without doing anything. */ if (pr->flags.bm_check && pr->flags.bm_control) { spin_lock(&c3_lock); c3_cpu_count++; /* Disable bus master arbitration when all CPUs are in C3 */ if (c3_cpu_count == num_online_cpus()) acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); spin_unlock(&c3_lock); } else if (!pr->flags.bm_check) { ACPI_FLUSH_CPU_CACHE(); } t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); acpi_idle_do_entry(cx); t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); /* Re-enable bus master arbitration */ if (pr->flags.bm_check && pr->flags.bm_control) { spin_lock(&c3_lock); acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); c3_cpu_count--; spin_unlock(&c3_lock); } #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) /* TSC could halt in idle, so notify users */ if (tsc_halts_in_c(ACPI_STATE_C3)) mark_tsc_unstable("TSC halts in idle"); #endif sleep_ticks = ticks_elapsed(t1, t2); /* Tell the scheduler how much we idled: */ sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); local_irq_enable(); current_thread_info()->status |= TS_POLLING; cx->usage++; acpi_state_timer_broadcast(pr, cx, 0); cx->time += sleep_ticks; return ticks_elapsed_in_us(t1, t2); }
/** * acpi_idle_enter_simple - enters an ACPI state without BM handling * @dev: the target CPU * @state: the state data */ static int acpi_idle_enter_simple(struct cpuidle_device *dev, struct cpuidle_state *state) { struct acpi_processor *pr; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); u32 t1, t2; int sleep_ticks = 0; pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; if (acpi_idle_suspend) return(acpi_idle_enter_c1(dev, state)); local_irq_disable(); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we test * NEED_RESCHED: */ smp_mb(); if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; local_irq_enable(); return 0; } /* * Must be done before busmaster disable as we might need to * access HPET ! */ acpi_state_timer_broadcast(pr, cx, 1); if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); /* Tell the scheduler that we are going deep-idle: */ sched_clock_idle_sleep_event(); acpi_idle_do_entry(cx); t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) /* TSC could halt in idle, so notify users */ if (tsc_halts_in_c(cx->type)) mark_tsc_unstable("TSC halts in idle");; #endif sleep_ticks = ticks_elapsed(t1, t2); /* Tell the scheduler how much we idled: */ sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); local_irq_enable(); current_thread_info()->status |= TS_POLLING; cx->usage++; acpi_state_timer_broadcast(pr, cx, 0); cx->time += sleep_ticks; return ticks_elapsed_in_us(t1, t2); }
/* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } static void exit_round_robin(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = -1; } static unsigned int idle_pct = 5; /* percentage */ static unsigned int round_robin_time = 1; /* second */ static int power_saving_thread(void *data) { struct sched_param param = {.sched_priority = 1}; int do_sleep; unsigned int tsk_index = (unsigned long)data; u64 last_jiffies = 0; sched_setscheduler(current, SCHED_RR, ¶m); while (!kthread_should_stop()) { unsigned long expire_time; try_to_freeze(); /* round robin to cpus */ expire_time = last_jiffies + round_robin_time * HZ; if (time_before(expire_time, jiffies)) { last_jiffies = jiffies; round_robin_cpu(tsk_index); } do_sleep = 0; expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { if (tsc_detected_unstable && !tsc_marked_unstable) { /* TSC could halt in idle, so notify users */ mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } local_irq_disable(); tick_broadcast_enable(); tick_broadcast_enter(); stop_critical_timings(); mwait_idle_with_hints(power_saving_mwait_eax, 1); start_critical_timings(); tick_broadcast_exit(); local_irq_enable(); if (time_before(expire_time, jiffies)) { do_sleep = 1; break; } } /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be * scheduled out for 5% CPU time to not starve other tasks. But * the mechanism only works when all CPUs have RT task running, * as if one CPU hasn't RT task, RT task from other CPUs will * borrow CPU time from this CPU and cause RT task use > 95% * CPU time. To make 'avoid starvation' work, takes a nap here. */ if (unlikely(do_sleep)) schedule_timeout_killable(HZ * idle_pct / 100); /* If an external event has set the need_resched flag, then * we need to deal with it, or this loop will continue to * spin without calling __mwait(). */ if (unlikely(need_resched())) schedule(); } exit_round_robin(tsk_index); return 0; } static struct task_struct *ps_tsks[NR_CPUS]; static unsigned int ps_tsk_num; static int create_power_saving_task(void) { int rc; ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, (void *)(unsigned long)ps_tsk_num, "acpi_pad/%d", ps_tsk_num); if (IS_ERR(ps_tsks[ps_tsk_num])) { rc = PTR_ERR(ps_tsks[ps_tsk_num]); ps_tsks[ps_tsk_num] = NULL; } else { rc = 0; ps_tsk_num++; } return rc; }
/* * Source CPU calls into this - it waits for the freshly booted * target CPU to arrive and then starts the measurement: */ void check_tsc_sync_source(int cpu) { int cpus = 2; /* * No need to check if we already know that the TSC is not * synchronized or if we have no TSC. */ if (unsynchronized_tsc()) return; /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR * 3 if the MSR is available, so the target can try to adjust */ if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) atomic_set(&test_runs, 1); else atomic_set(&test_runs, 3); retry: /* * Wait for the target to start or to skip the test: */ while (atomic_read(&start_count) != cpus - 1) { if (atomic_read(&skip_test) > 0) { atomic_set(&skip_test, 0); return; } cpu_relax(); } /* * Trigger the target to continue into the measurement too: */ atomic_inc(&start_count); check_tsc_warp(loop_timeout(cpu)); while (atomic_read(&stop_count) != cpus-1) cpu_relax(); /* * If the test was successful set the number of runs to zero and * stop. If not, decrement the number of runs an check if we can * retry. In case of random warps no retry is attempted. */ if (!nr_warps) { atomic_set(&test_runs, 0); pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", smp_processor_id(), cpu); } else if (atomic_dec_and_test(&test_runs) || random_warps) { /* Force it to 0 if random warps brought us here */ atomic_set(&test_runs, 0); pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); if (random_warps) pr_warning("TSC warped randomly between CPUs\n"); mark_tsc_unstable("check_tsc_sync_source failed"); } /* * Reset it - just in case we boot another CPU later: */ atomic_set(&start_count, 0); random_warps = 0; nr_warps = 0; max_warp = 0; last_tsc = 0; /* * Let the target continue with the bootup: */ atomic_inc(&stop_count); /* * Retry, if there is a chance to do so. */ if (atomic_read(&test_runs) > 0) goto retry; }
/* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } static void exit_round_robin(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = -1; } static unsigned int idle_pct = 5; /* percentage */ static unsigned int round_robin_time = 1; /* second */ static int power_saving_thread(void *data) { struct sched_param param = {.sched_priority = 1}; int do_sleep; unsigned int tsk_index = (unsigned long)data; u64 last_jiffies = 0; sched_setscheduler(current, SCHED_RR, ¶m); set_freezable(); while (!kthread_should_stop()) { int cpu; u64 expire_time; try_to_freeze(); /* round robin to cpus */ if (last_jiffies + round_robin_time * HZ < jiffies) { last_jiffies = jiffies; round_robin_cpu(tsk_index); } do_sleep = 0; expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { if (tsc_detected_unstable && !tsc_marked_unstable) { /* TSC could halt in idle, so notify users */ mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } if (lapic_detected_unstable && !lapic_marked_unstable) { int i; /* LAPIC could halt in idle, so notify users */ for_each_online_cpu(i) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ON, &i); lapic_marked_unstable = 1; } local_irq_disable(); cpu = smp_processor_id(); if (lapic_marked_unstable) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); stop_critical_timings(); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(power_saving_mwait_eax, 1); start_critical_timings(); if (lapic_marked_unstable) clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); local_irq_enable(); if (jiffies > expire_time) { do_sleep = 1; break; } } /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be * scheduled out for 5% CPU time to not starve other tasks. But * the mechanism only works when all CPUs have RT task running, * as if one CPU hasn't RT task, RT task from other CPUs will * borrow CPU time from this CPU and cause RT task use > 95% * CPU time. To make 'avoid starvation' work, takes a nap here. */ if (do_sleep) schedule_timeout_killable(HZ * idle_pct / 100); } exit_round_robin(tsk_index); return 0; } static struct task_struct *ps_tsks[NR_CPUS]; static unsigned int ps_tsk_num; static int create_power_saving_task(void) { int rc = -ENOMEM; ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, (void *)(unsigned long)ps_tsk_num, "acpi_pad/%d", ps_tsk_num); rc = PTR_RET(ps_tsks[ps_tsk_num]); if (!rc) ps_tsk_num++; else ps_tsks[ps_tsk_num] = NULL; return rc; }
/* * Source CPU calls into this - it waits for the freshly booted * target CPU to arrive and then starts the measurement: */ void __cpuinit check_tsc_sync_source(int cpu) { int cpus = 2; /* * No need to check if we already know that the TSC is not * synchronized: */ if (unsynchronized_tsc()) return; if (tsc_clocksource_reliable) { if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) pr_info( "Skipped synchronization checks as TSC is reliable.\n"); return; } /* * Reset it - in case this is a second bootup: */ atomic_set(&stop_count, 0); /* * Wait for the target to arrive: */ while (atomic_read(&start_count) != cpus-1) cpu_relax(); /* * Trigger the target to continue into the measurement too: */ atomic_inc(&start_count); check_tsc_warp(loop_timeout(cpu)); while (atomic_read(&stop_count) != cpus-1) cpu_relax(); if (nr_warps) { pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", smp_processor_id(), cpu); } /* * Reset it - just in case we boot another CPU later: */ atomic_set(&start_count, 0); nr_warps = 0; max_warp = 0; last_tsc = 0; /* * Let the target continue with the bootup: */ atomic_inc(&stop_count); }