/* * This needs a separate iteration over the cpus because we rely on all * topology_sibling_cpumask links to be set-up. */ for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); if ((i == cpu) || (has_mp && match_die(c, o))) { link_mask(topology_core_cpumask, cpu, i); /* * Does this new cpu bringup a new core? */ if (cpumask_weight( topology_sibling_cpumask(cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ if (cpumask_first( topology_sibling_cpumask(i)) == i) c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) cpu_data(i).booted_cores++; } else if (i != cpu && !c->booted_cores) c->booted_cores = cpu_data(i).booted_cores; } if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); }
static int cstate_cpu_init(unsigned int cpu) { unsigned int target; /* * If this is the first online thread of that core, set it in * the core cpu mask as the designated reader. */ target = cpumask_any_and(&cstate_core_cpu_mask, topology_sibling_cpumask(cpu)); if (has_cstate_core && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_core_cpu_mask); /* * If this is the first online thread of that package, set it * in the package cpu mask as the designated reader. */ target = cpumask_any_and(&cstate_pkg_cpu_mask, topology_core_cpumask(cpu)); if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); return 0; }
/* * Check if exiting cpu is the designated reader. If so migrate the * events when there is a valid target available */ static int cstate_cpu_exit(unsigned int cpu) { unsigned int target; if (has_cstate_core && cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); /* Migrate events if there is a valid target */ if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_core_cpu_mask); perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); } } if (has_cstate_pkg && cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { target = cpumask_any_but(topology_core_cpumask(cpu), cpu); /* Migrate events if there is a valid target */ if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_pkg_cpu_mask); perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); } } return 0; }
void set_cpu_sibling_map(int cpu) { bool has_smt = smp_num_siblings > 1; bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); if (!has_mp) { cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); c->booted_cores = 1; return; } for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); if ((i == cpu) || (has_smt && match_smt(c, o))) link_mask(topology_sibling_cpumask, cpu, i); if ((i == cpu) || (has_mp && match_llc(c, o))) link_mask(cpu_llc_shared_mask, cpu, i); }
static int get_first_sibling(unsigned int cpu) { unsigned int ret; ret = cpumask_first(topology_sibling_cpumask(cpu)); if (ret < nr_cpu_ids) return ret; return cpu; }
/* Update distances based on topology */ for_each_cpu(cpu, update_mask) { if (cpu_rmap_copy_neigh(rmap, cpu, topology_sibling_cpumask(cpu), 1)) continue; if (cpu_rmap_copy_neigh(rmap, cpu, topology_core_cpumask(cpu), 2)) continue; if (cpu_rmap_copy_neigh(rmap, cpu, cpumask_of_node(cpu_to_node(cpu)), 3)) continue; /* We could continue into NUMA node distances, but for now * we give up. */ }
static int cstate_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config; int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || event->attr.exclude_guest || event->attr.sample_period) /* no sampling */ return -EINVAL; if (event->cpu < 0) return -EINVAL; if (event->pmu == &cstate_core_pmu) { if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) return -EINVAL; if (!core_msr[cfg].attr) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; cpu = cpumask_any_and(&cstate_core_cpu_mask, topology_sibling_cpumask(event->cpu)); } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; cpu = cpumask_any_and(&cstate_pkg_cpu_mask, topology_core_cpumask(event->cpu)); } else { return -ENOENT; } if (cpu >= nr_cpu_ids) return -ENODEV; event->cpu = cpu; event->hw.config = cfg; event->hw.idx = -1; return 0; }
static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *c = &cpu_data(policy->cpu); int cpuid = 0; unsigned int i; #ifdef CONFIG_SMP cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu)); #endif /* Errata workaround */ cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; switch (cpuid) { case 0x0f07: case 0x0f0a: case 0x0f11: case 0x0f12: has_N44_O17_errata[policy->cpu] = 1; pr_debug("has errata -- disabling low frequencies\n"); } if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D && c->x86_model < 2) { /* switch to maximum frequency and measure result */ cpufreq_p4_setdc(policy->cpu, DC_DISABLE); recalibrate_cpu_khz(); } /* get max frequency */ stock_freq = cpufreq_p4_get_frequency(c); if (!stock_freq) return -EINVAL; /* table init */ for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { if ((i < 2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; } /* cpuinfo and default policy values */ /* the transition latency is set to be 1 higher than the maximum * transition latency of the ondemand governor */ policy->cpuinfo.transition_latency = 10000001; return cpufreq_table_validate_and_show(policy, &p4clockmod_table[0]); }
/** * sched_set_itmt_core_prio() - Set CPU priority based on ITMT * @prio: Priority of cpu core * @core_cpu: The cpu number associated with the core * * The pstate driver will find out the max boost frequency * and call this function to set a priority proportional * to the max boost frequency. CPU with higher boost * frequency will receive higher priority. * * No need to rebuild sched domain after updating * the CPU priorities. The sched domains have no * dependency on CPU priorities. */ void sched_set_itmt_core_prio(int prio, int core_cpu) { int cpu, i = 1; for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { int smt_prio; /* * Ensure that the siblings are moved to the end * of the priority chain and only used when * all other high priority cpus are out of capacity. */ smt_prio = prio * smp_num_siblings / i; per_cpu(sched_core_priority, cpu) = smt_prio; i++; }
static int power_cpu_init(unsigned int cpu) { int target; /* * 1) If any CPU is set at cpu_mask in the same compute unit, do * nothing. * 2) If no CPU is set at cpu_mask in the same compute unit, * set current ONLINE CPU. * * Note: if there is a CPU aside of the new one already in the * sibling mask, then it is also in cpu_mask. */ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); if (target >= nr_cpumask_bits) cpumask_set_cpu(cpu, &cpu_mask); return 0; }
static int power_cpu_exit(unsigned int cpu) { int target; if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) return 0; /* * Find a new CPU on the same compute unit, if was set in cpumask * and still some CPUs on compute unit. Then migrate event and * context to new CPU. */ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); if (target < nr_cpumask_bits) { cpumask_set_cpu(target, &cpu_mask); perf_pmu_migrate_context(&pmu_class, cpu, target); } return 0; }
static int speedstep_cpu_init(struct cpufreq_policy *policy) { unsigned int policy_cpu; struct get_freqs gf; /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu)); #endif policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); /* detect low and high frequency and transition latency */ gf.policy = policy; smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); if (gf.ret) return gf.ret; policy->freq_table = speedstep_freqs; return 0; }
static void round_robin_cpu(unsigned int tsk_index) { struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); cpumask_var_t tmp; int cpu; unsigned long min_weight = -1; unsigned long uninitialized_var(preferred_cpu); if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) return; mutex_lock(&round_robin_lock); cpumask_clear(tmp); for_each_cpu(cpu, pad_busy_cpus) cpumask_or(tmp, tmp, topology_sibling_cpumask(cpu)); cpumask_andnot(tmp, cpu_online_mask, tmp); /* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { if (cpu_weight[cpu] < min_weight) { min_weight = cpu_weight[cpu]; preferred_cpu = cpu; } } if (tsk_in_cpu[tsk_index] != -1) cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); }
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) { unsigned int i; unsigned int valid_states = 0; unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; #ifdef CONFIG_SMP static int blacklisted; #endif pr_debug("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP if (blacklisted) return blacklisted; blacklisted = acpi_cpufreq_blacklist(c); if (blacklisted) return blacklisted; #endif data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) { result = -ENOMEM; goto err_free; } perf = per_cpu_ptr(acpi_perf_data, cpu); data->acpi_perf_cpu = cpu; policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; result = acpi_processor_register_performance(perf, cpu); if (result) goto err_free_mask; policy->shared_type = perf->shared_type; /* * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { cpumask_copy(policy->cpus, perf->shared_cpu_map); } cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && !policy_is_shared(policy)) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, topology_sibling_cpumask(cpu)); policy->shared_type = CPUFREQ_SHARED_TYPE_HW; pr_info_once(PFX "overriding BIOS provided _PSD data\n"); } #endif /* capability check */ if (perf->state_count <= 1) { pr_debug("No P-States\n"); result = -ENODEV; goto err_unreg; } if (perf->control_register.space_id != perf->status_register.space_id) { result = -ENODEV; goto err_unreg; } switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 0xf) { pr_debug("AMD K8 systems must use native drivers.\n"); result = -ENODEV; goto err_unreg; } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); if (check_est_cpu(cpu)) { data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; break; } if (check_amd_hwpstate_cpu(cpu)) { data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; break; } result = -ENODEV; goto err_unreg; default: pr_debug("Unknown addr space %d\n", (u32) (perf->control_register.space_id)); result = -ENODEV; goto err_unreg; } data->freq_table = kzalloc(sizeof(*data->freq_table) * (perf->state_count+1), GFP_KERNEL); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; } /* detect transition latency */ policy->cpuinfo.transition_latency = 0; for (i = 0; i < perf->state_count; i++) { if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { policy->cpuinfo.transition_latency = 20 * 1000; printk_once(KERN_INFO "P-state transition latency capped at 20 uS\n"); } /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; data->freq_table[valid_states].driver_data = i; data->freq_table[valid_states].frequency = perf->states[i].core_frequency * 1000; valid_states++; } data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; perf->state = 0; result = cpufreq_table_validate_and_show(policy, data->freq_table); if (result) goto err_freqfree; if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: /* * The core will not set policy->cur, because * cpufreq_driver->get is NULL, so we need to set it here. * However, we have to guess it, because the current speed is * unknown and not detectable via IO ports. */ policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); break; case ACPI_ADR_SPACE_FIXED_HARDWARE: acpi_cpufreq_driver.get = get_cur_freq_on_cpu; break; default: break; } /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); pr_debug("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n", (i == perf->state ? '*' : ' '), i, (u32) perf->states[i].core_frequency, (u32) perf->states[i].power, (u32) perf->states[i].transition_latency); /* * the first call to ->target() should result in us actually * writing something to the appropriate registers. */ data->resume = 1; return result; err_freqfree: kfree(data->freq_table); err_unreg: acpi_processor_unregister_performance(cpu); err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: kfree(data); policy->driver_data = NULL; return result; }