void vpmu_destroy(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) return; /* * Need to clear last_vcpu in case it points to v. * We can check here non-atomically whether it is 'v' since * last_vcpu can never become 'v' again at this point. * We will test it again in vpmu_clear_last() with interrupts * disabled to make sure we don't clear someone else. */ if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v ) on_selected_cpus(cpumask_of(vpmu->last_pcpu), vpmu_clear_last, v, 1); if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) { /* Unload VPMU first. This will stop counters */ on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), vpmu_save_force, v, 1); vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); } spin_lock(&vpmu_lock); if ( !is_hardware_domain(v->domain) ) vpmu_count--; spin_unlock(&vpmu_lock); }
static void vmx_clear_vmcs(struct vcpu *v) { int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1); }
void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m) { on_selected_cpus(p2m->dirty_cpumask, nestedhvm_flushtlb_ipi, p2m->domain, 1); cpumask_clear(p2m->dirty_cpumask); }
static void drv_write(struct drv_cmd *cmd) { if (cpumask_equal(cmd->mask, cpumask_of(smp_processor_id()))) do_drv_write((void *)cmd); else on_selected_cpus(cmd->mask, do_drv_write, cmd, 1); }
static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res) { if ( smp_processor_id() == cpu ) do_get_hw_residencies((void *)hw_res); else on_selected_cpus(cpumask_of(cpu), do_get_hw_residencies, (void *)hw_res, 1); }
void vpmu_load(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); int pcpu = smp_processor_id(); struct vcpu *prev = NULL; if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) return; /* First time this VCPU is running here */ if ( vpmu->last_pcpu != pcpu ) { /* * Get the context from last pcpu that we ran on. Note that if another * VCPU is running there it must have saved this VPCU's context before * startig to run (see below). * There should be no race since remote pcpu will disable interrupts * before saving the context. */ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) { vpmu_set(vpmu, VPMU_CONTEXT_SAVE); on_selected_cpus(cpumask_of(vpmu->last_pcpu), vpmu_save_force, (void *)v, 1); vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); } } /* Prevent forced context save from remote CPU */ local_irq_disable(); prev = per_cpu(last_vcpu, pcpu); if ( prev != v && prev ) { vpmu = vcpu_vpmu(prev); /* Someone ran here before us */ vpmu_set(vpmu, VPMU_CONTEXT_SAVE); vpmu_save_force(prev); vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); vpmu = vcpu_vpmu(v); } local_irq_enable(); /* Only when PMU is counting, we load PMU context immediately. */ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) return; if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) { apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); /* Arch code needs to set VPMU_CONTEXT_LOADED */ vpmu->arch_vpmu_ops->arch_vpmu_load(v); } }
/* * Return the measured active (C0) frequency on this CPU since last call * to this function. * Input: cpu number * Return: Average CPU frequency in terms of max frequency (zero on error) * * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance * over a period of time, while CPU is in C0 state. * IA32_MPERF counts at the rate of max advertised frequency * IA32_APERF counts at the rate of actual CPU frequency * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and * no meaning should be associated with absolute values of these MSRs. */ unsigned int get_measured_perf(unsigned int cpu, unsigned int flag) { struct cpufreq_policy *policy; struct perf_pair readin, cur, *saved; unsigned int perf_percent; unsigned int retval; if (!cpu_online(cpu)) return 0; policy = per_cpu(cpufreq_cpu_policy, cpu); if (!policy || !policy->aperf_mperf) return 0; switch (flag) { case GOV_GETAVG: { saved = &per_cpu(gov_perf_pair, cpu); break; } case USR_GETAVG: { saved = &per_cpu(usr_perf_pair, cpu); break; } default: return 0; } if (cpu == smp_processor_id()) { read_measured_perf_ctrs((void *)&readin); } else { on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs, &readin, 1); } cur.aperf.whole = readin.aperf.whole - saved->aperf.whole; cur.mperf.whole = readin.mperf.whole - saved->mperf.whole; saved->aperf.whole = readin.aperf.whole; saved->mperf.whole = readin.mperf.whole; #ifdef __i386__ /* * We dont want to do 64 bit divide with 32 bit kernel * Get an approximate value. Return failure in case we cannot get * an approximate value. */ if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { int shift_count; uint32_t h; h = max_t(uint32_t, cur.aperf.split.hi, cur.mperf.split.hi); shift_count = fls(h); cur.aperf.whole >>= shift_count; cur.mperf.whole >>= shift_count; }
static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res) { memset(hw_res, 0, sizeof(*hw_res)); if ( smp_processor_id() == cpu ) do_get_hw_residencies(hw_res); else on_selected_cpus(cpumask_of(cpu), do_get_hw_residencies, hw_res, 1); }
static int powernow_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu]; struct processor_performance *perf; unsigned int next_state; /* Index into freq_table */ unsigned int next_perf_state; /* Index into perf table */ int result; if (unlikely(data == NULL || data->acpi_data == NULL || data->freq_table == NULL)) { return -ENODEV; } perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, target_freq, relation, &next_state); if (unlikely(result)) return result; next_perf_state = data->freq_table[next_state].index; if (perf->state == next_perf_state) { if (unlikely(data->arch_cpu_flags & ARCH_CPU_FLAG_RESUME)) data->arch_cpu_flags &= ~ARCH_CPU_FLAG_RESUME; else return 0; } if (policy->shared_type == CPUFREQ_SHARED_TYPE_HW && likely(policy->cpu == smp_processor_id())) { transition_pstate(&next_perf_state); cpufreq_statistic_update(policy->cpu, perf->state, next_perf_state); } else { cpumask_t online_policy_cpus; unsigned int cpu; cpumask_and(&online_policy_cpus, policy->cpus, &cpu_online_map); if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || unlikely(policy->cpu != smp_processor_id())) on_selected_cpus(&online_policy_cpus, transition_pstate, &next_perf_state, 1); else transition_pstate(&next_perf_state); for_each_cpu(cpu, &online_policy_cpus) cpufreq_statistic_update(cpu, perf->state, next_perf_state); } perf->state = next_perf_state; policy->cur = data->freq_table[next_state].frequency; return 0; }
static int powernow_cpufreq_update (int cpuid, struct cpufreq_policy *policy) { if (!cpumask_test_cpu(cpuid, &cpu_online_map)) return -EINVAL; on_selected_cpus(cpumask_of(cpuid), update_cpb, policy, 1); return 0; }
static void drv_read(struct drv_cmd *cmd) { cmd->val = 0; ASSERT(cpumask_weight(cmd->mask) == 1); /* to reduce IPI for the sake of performance */ if (likely(cpumask_test_cpu(smp_processor_id(), cmd->mask))) do_drv_read((void *)cmd); else on_selected_cpus(cmd->mask, do_drv_read, cmd, 1); }
void vmx_do_resume(struct vcpu *v) { bool_t debug_state; if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() ) { if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) ) vmx_load_vmcs(v); } else { /* * For pass-through domain, guest PCI-E device driver may leverage the * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space. * Since migration may occur before WBINVD or CLFLUSH, we need to * maintain data consistency either by: * 1: flushing cache (wbinvd) when the guest is scheduled out if * there is no wbinvd exit, or * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits. */ if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) && !cpu_has_wbinvd_exiting ) { int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) on_selected_cpus(cpumask_of_cpu(cpu), wbinvd_ipi, NULL, 1, 1); } vmx_clear_vmcs(v); vmx_load_vmcs(v); hvm_migrate_timers(v); vmx_set_host_env(v); } debug_state = v->domain->debugger_attached; if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) ) { unsigned long intercepts = __vmread(EXCEPTION_BITMAP); unsigned long mask = (1U << TRAP_debug) | (1U << TRAP_int3); v->arch.hvm_vcpu.debug_state_latch = debug_state; if ( debug_state ) intercepts |= mask; else intercepts &= ~mask; __vmwrite(EXCEPTION_BITMAP, intercepts); } hvm_do_resume(v); reset_stack_and_jump(vmx_asm_do_vmentry); }
/* * Return the measured active (C0) frequency on this CPU since last call * to this function. * Input: cpu number * Return: Average CPU frequency in terms of max frequency (zero on error) * * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance * over a period of time, while CPU is in C0 state. * IA32_MPERF counts at the rate of max advertised frequency * IA32_APERF counts at the rate of actual CPU frequency * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and * no meaning should be associated with absolute values of these MSRs. */ unsigned int get_measured_perf(unsigned int cpu, unsigned int flag) { struct cpufreq_policy *policy; struct perf_pair readin, cur, *saved; unsigned int perf_percent; unsigned int retval; if (!cpu_online(cpu)) return 0; policy = per_cpu(cpufreq_cpu_policy, cpu); if (!policy || !policy->aperf_mperf) return 0; switch (flag) { case GOV_GETAVG: { saved = &per_cpu(gov_perf_pair, cpu); break; } case USR_GETAVG: { saved = &per_cpu(usr_perf_pair, cpu); break; } default: return 0; } if (cpu == smp_processor_id()) { read_measured_perf_ctrs((void *)&readin); } else { on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs, &readin, 1); } cur.aperf.whole = readin.aperf.whole - saved->aperf.whole; cur.mperf.whole = readin.mperf.whole - saved->mperf.whole; saved->aperf.whole = readin.aperf.whole; saved->mperf.whole = readin.mperf.whole; if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { int shift_count = 7; cur.aperf.whole >>= shift_count; cur.mperf.whole >>= shift_count; }
void vpmu_destroy(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) return; /* * Need to clear last_vcpu in case it points to v. * We can check here non-atomically whether it is 'v' since * last_vcpu can never become 'v' again at this point. * We will test it again in vpmu_clear_last() with interrupts * disabled to make sure we don't clear someone else. */ if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v ) on_selected_cpus(cpumask_of(vpmu->last_pcpu), vpmu_clear_last, v, 1); if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); }
static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy) { unsigned int i; unsigned int valid_states = 0; unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; struct processor_performance *perf; u32 max_hw_pstate; uint64_t msr_content; struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; data = xzalloc(struct acpi_cpufreq_data); if (!data) return -ENOMEM; cpufreq_drv_data[cpu] = data; data->acpi_data = &processor_pminfo[cpu]->perf; perf = data->acpi_data; policy->shared_type = perf->shared_type; if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { cpumask_set_cpu(cpu, policy->cpus); if (cpumask_weight(policy->cpus) != 1) { printk(XENLOG_WARNING "Unsupported sharing type %d (%u CPUs)\n", policy->shared_type, cpumask_weight(policy->cpus)); result = -ENODEV; goto err_unreg; } } else { cpumask_copy(policy->cpus, cpumask_of(cpu)); } /* capability check */ if (perf->state_count <= 1) { printk("No P-States\n"); result = -ENODEV; goto err_unreg; } rdmsrl(MSR_PSTATE_CUR_LIMIT, msr_content); max_hw_pstate = (msr_content & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; if (perf->control_register.space_id != perf->status_register.space_id) { result = -ENODEV; goto err_unreg; } data->freq_table = xmalloc_array(struct cpufreq_frequency_table, (perf->state_count+1)); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; } /* detect transition latency */ policy->cpuinfo.transition_latency = 0; for (i=0; i<perf->state_count; i++) { if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR; /* table init */ for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) { if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; data->freq_table[valid_states].index = perf->states[i].control & HW_PSTATE_MASK; data->freq_table[valid_states].frequency = perf->states[i].core_frequency * 1000; valid_states++; } data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; perf->state = 0; result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); if (result) goto err_freqfree; if (c->cpuid_level >= 6) on_selected_cpus(cpumask_of(cpu), feature_detect, policy, 1); /* * the first call to ->target() should result in us actually * writing something to the appropriate registers. */ data->arch_cpu_flags |= ARCH_CPU_FLAG_RESUME; policy->cur = data->freq_table[i].frequency; return result; err_freqfree: xfree(data->freq_table); err_unreg: xfree(data); cpufreq_drv_data[cpu] = NULL; return result; }
static void drv_write(struct drv_cmd *cmd) { on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); }