void irq_move_masked_irq(struct irq_data *idata) { struct irq_desc *desc = irq_data_to_desc(idata); struct irq_chip *chip = desc->irq_data.chip; if (likely(!irqd_is_setaffinity_pending(&desc->irq_data))) return; irqd_clr_move_pending(&desc->irq_data); /* * Paranoia: cpu-local interrupts shouldn't be calling in here anyway. */ if (irqd_is_per_cpu(&desc->irq_data)) { WARN_ON(1); return; } if (unlikely(cpumask_empty(desc->pending_mask))) return; if (!chip->irq_set_affinity) return; assert_raw_spin_locked(&desc->lock); /* * If there was a valid mask to work with, please * do the disable, re-program, enable sequence. * This is *not* particularly important for level triggered * but in a edge trigger case, we might be setting rte * when an active trigger is coming in. This could * cause some ioapics to mal-function. * Being paranoid i guess! * * For correct operation this depends on the caller * masking the irqs. */ if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false); cpumask_clear(desc->pending_mask); }
/* * Remove a CPU from broadcasting */ void tick_shutdown_broadcast(unsigned int *cpup) { struct clock_event_device *bc; unsigned long flags; unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, tick_broadcast_mask); cpumask_clear_cpu(cpu, tick_broadcast_on); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { if (bc && cpumask_empty(tick_broadcast_mask)) clockevents_shutdown(bc); } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); }
void _percpu_write_lock(percpu_rwlock_t **per_cpudata, percpu_rwlock_t *percpu_rwlock) { unsigned int cpu; cpumask_t *rwlock_readers = &this_cpu(percpu_rwlock_readers); /* Validate the correct per_cpudata variable has been provided. */ _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock); /* * First take the write lock to protect against other writers or slow * path readers. */ write_lock(&percpu_rwlock->rwlock); /* Now set the global variable so that readers start using read_lock. */ percpu_rwlock->writer_activating = 1; smp_mb(); /* Using a per cpu cpumask is only safe if there is no nesting. */ ASSERT(!in_irq()); cpumask_copy(rwlock_readers, &cpu_online_map); /* Check if there are any percpu readers in progress on this rwlock. */ for ( ; ; ) { for_each_cpu(cpu, rwlock_readers) { /* * Remove any percpu readers not contending on this rwlock * from our check mask. */ if ( per_cpu_ptr(per_cpudata, cpu) != percpu_rwlock ) __cpumask_clear_cpu(cpu, rwlock_readers); } /* Check if we've cleared all percpu readers from check mask. */ if ( cpumask_empty(rwlock_readers) ) break; /* Give the coherency fabric a break. */ cpu_relax(); }; }
void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags) { ASSERT(local_irq_is_enabled()); if ( cpumask_test_cpu(smp_processor_id(), mask) ) flush_area_local(va, flags); if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) ) { spin_lock(&flush_lock); cpumask_and(&flush_cpumask, mask, &cpu_online_map); cpumask_clear_cpu(smp_processor_id(), &flush_cpumask); flush_va = va; flush_flags = flags; send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR); while ( !cpumask_empty(&flush_cpumask) ) cpu_relax(); spin_unlock(&flush_lock); } }
/* * Broadcast the event to the cpus, which are set in the mask (mangled). */ static bool tick_do_broadcast(struct cpumask *mask) { int cpu = smp_processor_id(); struct tick_device *td; bool local = false; /* * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { struct clock_event_device *bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, mask); /* * We only run the local handler, if the broadcast * device is not hrtimer based. Otherwise we run into * a hrtimer recursion. * * local timer_interrupt() * local_handler() * expire_hrtimers() * bc_handler() * local_handler() * expire_hrtimers() */ local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { /* * It might be necessary to actually check whether the devices * have different broadcast functions. For now, just use the * one of the first device. This works as long as we have this * misfeature only on x86 (lapic) */ td = &per_cpu(tick_cpu_device, cpumask_first(mask)); td->evtdev->broadcast(mask); } return local; }
/* On return cpumask will be altered to indicate CPUs changed. * CPUs with states changed will be set in the mask, * CPUs with status unchanged will be unset in the mask. */ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, cpumask_var_t cpus) { int cpu; int cpuret = 0; int ret = 0; if (cpumask_empty(cpus)) return 0; for_each_cpu(cpu, cpus) { switch (state) { case DOWN: cpuret = cpu_down(cpu); break; case UP: cpuret = cpu_up(cpu); break; } if (cpuret) { pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", __func__, ((state == UP) ? "up" : "down"), cpu, cpuret); if (!ret) ret = cpuret; if (state == UP) { /* clear bits for unchanged cpus, return */ cpumask_shift_right(cpus, cpus, cpu); cpumask_shift_left(cpus, cpus, cpu); break; } else { /* clear bit for unchanged cpu, continue */ cpumask_clear_cpu(cpu, cpus); } } } return ret; }
static u32 get_cur_val(const cpumask_t *mask) { struct cpufreq_policy *policy; struct processor_performance *perf; struct drv_cmd cmd; unsigned int cpu = smp_processor_id(); if (unlikely(cpumask_empty(mask))) return 0; if (!cpumask_test_cpu(cpu, mask)) cpu = cpumask_first(mask); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return 0; policy = per_cpu(cpufreq_cpu_policy, cpu); if (!policy || !cpufreq_drv_data[policy->cpu]) return 0; switch (cpufreq_drv_data[policy->cpu]->arch_cpu_flags) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; perf = cpufreq_drv_data[policy->cpu]->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; default: return 0; } cmd.mask = cpumask_of(cpu); drv_read(&cmd); return cmd.val; }
void __ref enable_nonboot_cpus(void) { int cpu, error; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); cpu_hotplug_disabled = 0; if (cpumask_empty(frozen_cpus)) goto out; printk(KERN_INFO "Enabling non-boot CPUs ...\n"); arch_enable_nonboot_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { error = _cpu_up(cpu, 1); if (!error) { printk(KERN_INFO "CPU%d is up\n", cpu); continue; } printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); }
void __ref enable_nonboot_cpus(void) { int cpu, error; #if defined (CONFIG_MACH_APQ8064_OMEGA) || defined (CONFIG_MACH_APQ8064_OMEGAR) static int first = 0; if (!first) { init_timer(&boost_freq_timer); first = 1; } if (timer_pending(&boost_freq_timer)) del_timer(&boost_freq_timer); boost_freq_timer.function = boost_freq_timer_cb; boost_freq_timer.expires = jiffies + msecs_to_jiffies(BOOST_FREQ_TIME_MS); add_timer(&boost_freq_timer); boost_freq = 1; #endif /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); cpu_hotplug_disabled = 0; if (cpumask_empty(frozen_cpus)) goto out; printk(KERN_INFO "Enabling non-boot CPUs ...\n"); arch_enable_nonboot_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { error = _cpu_up(cpu, 1); if (!error) { printk(KERN_INFO "CPU%d is up\n", cpu); continue; } printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); }
/* This gets called just before system reboots */ void opal_flash_term_callback(void) { struct cpumask mask; if (update_flash_data.status != FLASH_IMG_READY) return; pr_alert("FLASH: Flashing new firmware\n"); pr_alert("FLASH: Image is %u bytes\n", image_data.size); pr_alert("FLASH: Performing flash and reboot/shutdown\n"); pr_alert("FLASH: This will take several minutes. Do not power off!\n"); /* Small delay to help getting the above message out */ msleep(500); /* Return secondary CPUs to firmware */ cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); if (!cpumask_empty(&mask)) smp_call_function_many(&mask, flash_return_cpu, NULL, false); /* Hard disable interrupts */ hard_irq_disable(); }
static int __bind_irq_vector(int irq, int vector, cpumask_t domain) { cpumask_t mask; int cpu; struct irq_cfg *cfg = &irq_cfg[irq]; BUG_ON((unsigned)irq >= NR_IRQS); BUG_ON((unsigned)vector >= IA64_NUM_VECTORS); cpumask_and(&mask, &domain, cpu_online_mask); if (cpumask_empty(&mask)) return -EINVAL; if ((cfg->vector == vector) && cpumask_equal(&cfg->domain, &domain)) return 0; if (cfg->vector != IRQ_VECTOR_UNASSIGNED) return -EBUSY; for_each_cpu(cpu, &mask) per_cpu(vector_irq, cpu)[vector] = irq; cfg->vector = vector; cfg->domain = domain; irq_status[irq] = IRQ_USED; cpumask_or(&vector_table[vector], &vector_table[vector], &domain); return 0; }
void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) { struct hmp_domain *domain; arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); /* * Initialize hmp_domains * Must be ordered with respect to compute capacity. * Fastest domain at head of list. */ if(!cpumask_empty(&hmp_slow_cpu_mask)) { domain = (struct hmp_domain *) kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); list_add(&domain->hmp_domains, hmp_domains_list); } domain = (struct hmp_domain *) kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); list_add(&domain->hmp_domains, hmp_domains_list); }
void __ref enable_nonboot_cpus(void) { int cpu, error; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); cpu_hotplug_disabled = 0; if (cpumask_empty(frozen_cpus)) goto out; pr_info("Enabling non-boot CPUs ...\n"); arch_enable_nonboot_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { trace_suspend_resume(TPS("CPU_ON"), cpu, true); error = _cpu_up(cpu, 1); trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); continue; } pr_warn("Error taking CPU%d up: %d\n", cpu, error); }
/* * Handle oneshot mode broadcasting */ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) { struct tick_device *td; ktime_t now, next_event; int cpu, next_cpu = 0; bool bc_local; raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { /* * Required for !SMP because for_each_cpu() reports * unconditionally CPU0 as set on UP kernels. */ if (!IS_ENABLED(CONFIG_SMP) && cpumask_empty(tick_broadcast_oneshot_mask)) break; td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event <= now) { cpumask_set_cpu(cpu, tmpmask); /* * Mark the remote cpu in the pending mask, so * it can avoid reprogramming the cpu local * timer in tick_broadcast_oneshot_control(). */ cpumask_set_cpu(cpu, tick_broadcast_pending_mask); } else if (td->evtdev->next_event < next_event) { next_event = td->evtdev->next_event; next_cpu = cpu; } } /* * Remove the current cpu from the pending mask. The event is * delivered immediately in tick_do_broadcast() ! */ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); /* Take care of enforced broadcast requests */ cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); cpumask_clear(tick_broadcast_force_mask); /* * Sanity check. Catch the case where we try to broadcast to * offline cpus. */ if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) cpumask_and(tmpmask, tmpmask, cpu_online_mask); /* * Wakeup the cpus which have an expired event. */ bc_local = tick_do_broadcast(tmpmask); /* * Two reasons for reprogram: * * - The global event did not expire any CPU local * events. This happens in dyntick mode, as the maximum PIT * delta is quite small. * * - There are pending events on sleeping CPUs which were not * in the event mask */ if (next_event != KTIME_MAX) tick_broadcast_set_event(dev, next_cpu, next_event); raw_spin_unlock(&tick_broadcast_lock); if (bc_local) { td = this_cpu_ptr(&tick_cpu_device); td->evtdev->event_handler(td->evtdev); } }
/** * tick_broadcast_control - Enable/disable or force broadcast mode * @mode: The selected broadcast mode * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; unsigned long flags; /* Protects also the local clockevent device. */ raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) goto out; if (!tick_device_is_functional(dev)) goto out; cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); switch (mode) { case TICK_BROADCAST_FORCE: tick_broadcast_forced = 1; case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { /* * Only shutdown the cpu local device, if: * * - the broadcast device exists * - the broadcast device is not a hrtimer based one * - the broadcast device is in periodic mode to * avoid a hickup during switch to oneshot mode */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; case TICK_BROADCAST_OFF: if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; } if (bc) { if (cpumask_empty(tick_broadcast_mask)) { if (!bc_stopped) clockevents_shutdown(bc); } else if (bc_stopped) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc); } } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); }
/* * Check, if the device is disfunctional and a place holder, which * needs to be handled by the broadcast device. */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Devices might be registered with both periodic and oneshot * mode disabled. This signals, that the device needs to be * operated from the broadcast device and is a placeholder for * the cpu local device. */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc); ret = 1; } else { /* * Clear the broadcast bit for this cpu if the * device is not power state affected. */ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); else tick_device_setup_broadcast_func(dev); /* * Clear the broadcast bit if the CPU is not in * periodic broadcast on state. */ if (!cpumask_test_cpu(cpu, tick_broadcast_on)) cpumask_clear_cpu(cpu, tick_broadcast_mask); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_ONESHOT: /* * If the system is in oneshot mode we can * unconditionally clear the oneshot mask bit, * because the CPU is running and therefore * not in an idle state which causes the power * state affected device to stop. Let the * caller initialize the device. */ tick_broadcast_clear_oneshot(cpu); ret = 0; break; case TICKDEV_MODE_PERIODIC: /* * If the system is in periodic mode, check * whether the broadcast device can be * switched off now. */ if (cpumask_empty(tick_broadcast_mask) && bc) clockevents_shutdown(bc); /* * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt * is delivered by the broadcast device, if * the broadcast device exists and is not * hrtimer based. */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; }
/* * This cpu is going to be removed and its vectors migrated to the remaining * online cpus. Check to see if there are enough vectors in the remaining cpus. * This function is protected by stop_machine(). */ int check_irq_vectors_for_cpu_disable(void) { int irq, cpu; unsigned int this_cpu, vector, this_count, count; struct irq_desc *desc; struct irq_data *data; this_cpu = smp_processor_id(); cpumask_copy(&online_new, cpu_online_mask); cpu_clear(this_cpu, online_new); this_count = 0; for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { irq = __this_cpu_read(vector_irq[vector]); if (irq >= 0) { desc = irq_to_desc(irq); data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, data->affinity); cpu_clear(this_cpu, affinity_new); /* Do not count inactive or per-cpu irqs. */ if (!irq_has_action(irq) || irqd_is_per_cpu(data)) continue; /* * A single irq may be mapped to multiple * cpu's vector_irq[] (for example IOAPIC cluster * mode). In this case we have two * possibilities: * * 1) the resulting affinity mask is empty; that is * this the down'd cpu is the last cpu in the irq's * affinity mask, or * * 2) the resulting affinity mask is no longer * a subset of the online cpus but the affinity * mask is not zero; that is the down'd cpu is the * last online cpu in a user set affinity mask. */ if (cpumask_empty(&affinity_new) || !cpumask_subset(&affinity_new, &online_new)) this_count++; } } count = 0; for_each_online_cpu(cpu) { if (cpu == this_cpu) continue; for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] < 0) count++; } } if (count < this_count) { pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", this_cpu, this_count, count); return -ERANGE; } return 0; }
static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask) { struct acpi_processor_performance *perf = to_perf_data(data); struct drv_cmd cmd = { .reg = &perf->control_register, .func.read = data->cpu_freq_read, }; int err; err = smp_call_function_any(mask, do_drv_read, &cmd, 1); WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ return cmd.val; } /* Called via smp_call_function_many(), on the target CPUs */ static void do_drv_write(void *_cmd) { struct drv_cmd *cmd = _cmd; cmd->func.write(cmd->reg, cmd->val); } static void drv_write(struct acpi_cpufreq_data *data, const struct cpumask *mask, u32 val) { struct acpi_processor_performance *perf = to_perf_data(data); struct drv_cmd cmd = { .reg = &perf->control_register, .val = val, .func.write = data->cpu_freq_write, }; int this_cpu; this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) do_drv_write(&cmd); smp_call_function_many(mask, do_drv_write, &cmd, 1); put_cpu(); } static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) { u32 val; if (unlikely(cpumask_empty(mask))) return 0; val = drv_read(data, mask); pr_debug("get_cur_val = %u\n", val); return val; } static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { struct acpi_cpufreq_data *data; struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); policy = cpufreq_cpu_get_raw(cpu); if (unlikely(!policy)) return 0; data = policy->driver_data; if (unlikely(!data || !policy->freq_table)) return 0; cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. * Force set the frequency on next target call. */ data->resume = 1; } pr_debug("cur freq = %u\n", freq); return freq; } static unsigned int check_freqs(struct cpufreq_policy *policy, const struct cpumask *mask, unsigned int freq) { struct acpi_cpufreq_data *data = policy->driver_data; unsigned int cur_freq; unsigned int i; for (i = 0; i < 100; i++) { cur_freq = extract_freq(policy, get_cur_val(mask, data)); if (cur_freq == freq) return 1; udelay(10); } return 0; } static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; const struct cpumask *mask; unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; if (unlikely(!data)) { return -ENODEV; } perf = to_perf_data(data); next_perf_state = policy->freq_table[index].driver_data; if (perf->state == next_perf_state) { if (unlikely(data->resume)) { pr_debug("Called after resume, resetting to P%d\n", next_perf_state); data->resume = 0; } else { pr_debug("Already at target state (P%d)\n", next_perf_state); return 0; } } /* * The core won't allow CPUs to go away until the governor has been * stopped, so we can rely on the stability of policy->cpus. */ mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ? cpumask_of(policy->cpu) : policy->cpus; drv_write(data, mask, perf->states[next_perf_state].control); if (acpi_pstate_strict) { if (!check_freqs(policy, mask, policy->freq_table[index].frequency)) { pr_debug("acpi_cpufreq_target failed (%d)\n", policy->cpu); result = -EAGAIN; } } if (!result) perf->state = next_perf_state; return result; } unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; struct cpufreq_frequency_table *entry; unsigned int next_perf_state, next_freq, index; /* * Find the closest frequency above target_freq. */ if (policy->cached_target_freq == target_freq) index = policy->cached_resolved_idx; else index = cpufreq_table_find_index_dl(policy, target_freq); entry = &policy->freq_table[index]; next_freq = entry->frequency; next_perf_state = entry->driver_data; perf = to_perf_data(data); if (perf->state == next_perf_state) { if (unlikely(data->resume)) data->resume = 0; else return next_freq; } data->cpu_freq_write(&perf->control_register, perf->states[next_perf_state].control); perf->state = next_perf_state; return next_freq; } static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { struct acpi_processor_performance *perf; perf = to_perf_data(data); if (cpu_khz) { /* search the closest match to cpu_khz */ unsigned int i; unsigned long freq; unsigned long freqn = perf->states[0].core_frequency * 1000; for (i = 0; i < (perf->state_count-1); i++) { freq = freqn; freqn = perf->states[i+1].core_frequency * 1000; if ((2 * cpu_khz) > (freqn + freq)) { perf->state = i; return freq; } } perf->state = perf->state_count-1; return freqn; } else { /* assume CPU is at P0... */ perf->state = 0; return perf->states[0].core_frequency * 1000; } } static void free_acpi_perf_data(void) { unsigned int i; /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ for_each_possible_cpu(i) free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) ->shared_cpu_map); free_percpu(acpi_perf_data); }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. * * This routine transitions us from using a set of compiled-in large * pages to using some more precise caching, including removing access * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START) * marking read-only data as locally cacheable, striping the remaining * .data and .bss across all the available tiles, and removing access * to pages above the top of RAM (thus ensuring a page fault from a bad * virtual address rather than a hypervisor shoot down for accessing * memory outside the assigned limits). */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) { unsigned long long irqmask; unsigned long address, pfn; pmd_t *pmd; pte_t *pte; int pte_ofs; const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id()); struct cpumask kstripe_mask; int rc, i; #if CHIP_HAS_CBOX_HOME_MAP() if (ktext_arg_seen && ktext_hash) { pr_warning("warning: \"ktext\" boot argument ignored" " if \"kcache_hash\" sets up text hash-for-home\n"); ktext_small = 0; } if (kdata_arg_seen && kdata_hash) { pr_warning("warning: \"kdata\" boot argument ignored" " if \"kcache_hash\" sets up data hash-for-home\n"); } if (kdata_huge && !hash_default) { pr_warning("warning: disabling \"kdata=huge\"; requires" " kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } #endif /* * Set up a mask for cpus to use for kernel striping. * This is normally all cpus, but minus dataplane cpus if any. * If the dataplane covers the whole chip, we stripe over * the whole chip too. */ cpumask_copy(&kstripe_mask, cpu_possible_mask); if (!kdata_arg_seen) kdata_mask = kstripe_mask; /* Allocate and fill in L2 page tables */ for (i = 0; i < MAX_NUMNODES; ++i) { #ifdef CONFIG_HIGHMEM unsigned long end_pfn = node_lowmem_end_pfn[i]; #else unsigned long end_pfn = node_end_pfn[i]; #endif unsigned long end_huge_pfn = 0; /* Pre-shatter the last huge page to allow per-cpu pages. */ if (kdata_huge) end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT); pfn = node_start_pfn[i]; /* Allocate enough memory to hold L2 page tables for node. */ init_prealloc_ptes(i, end_pfn - pfn); address = (unsigned long) pfn_to_kaddr(pfn); while (pfn < end_pfn) { BUG_ON(address & (HPAGE_SIZE-1)); pmd = get_pmd(pgtables, address); pte = get_prealloc_pte(pfn); if (pfn < end_huge_pfn) { pgprot_t prot = init_pgprot(address); *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot)); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) pte[pte_ofs] = pfn_pte(pfn, prot); } else { if (kdata_huge) printk(KERN_DEBUG "pre-shattered huge" " page at %#lx\n", address); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) { pgprot_t prot = init_pgprot(address); pte[pte_ofs] = pfn_pte(pfn, prot); } assign_pte(pmd, pte); } } } /* * Set or check ktext_map now that we have cpu_possible_mask * and kstripe_mask to work with. */ if (ktext_all) cpumask_copy(&ktext_mask, cpu_possible_mask); else if (ktext_nondataplane) ktext_mask = kstripe_mask; else if (!cpumask_empty(&ktext_mask)) { /* Sanity-check any mask that was requested */ struct cpumask bad; cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); if (!cpumask_empty(&bad)) { char buf[NR_CPUS * 5]; cpulist_scnprintf(buf, sizeof(buf), &bad); pr_info("ktext: not using unavailable cpus %s\n", buf); } if (cpumask_empty(&ktext_mask)) { pr_warning("ktext: no valid cpus; caching on %d.\n", smp_processor_id()); cpumask_copy(&ktext_mask, cpumask_of(smp_processor_id())); } } address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC, PAGE_HOME_IMMUTABLE); if (ktext_local) { if (ktext_nocache) prot = hv_pte_set_mode(prot, HV_PTE_MODE_UNCACHED); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } else { prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_TILE_L3); cpu = cpumask_first(&ktext_mask); prot = ktext_set_nocache(prot); } BUG_ON(address != (unsigned long)_stext); pte = NULL; for (; address < (unsigned long)_einittext; pfn++, address += PAGE_SIZE) { pte_ofs = pte_index(address); if (pte_ofs == 0) { if (pte) assign_pte(pmd++, pte); pte = alloc_pte(); } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); if (cpu == NR_CPUS) cpu = cpumask_first(&ktext_mask); } pte[pte_ofs] = pfn_pte(pfn, prot); } if (pte) assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); #if CHIP_HAS_CBOX_HOME_MAP() if (ktext_hash) { pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_HASH_L3); pteval = ktext_set_nocache(pteval); } else #endif /* CHIP_HAS_CBOX_HOME_MAP() */ if (cpumask_weight(&ktext_mask) == 1) { pteval = set_remote_cache_cpu(pteval, cpumask_first(&ktext_mask)); pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_TILE_L3); pteval = ktext_set_nocache(pteval); } else if (ktext_nocache) pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_UNCACHED); else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); for (; address < (unsigned long)_einittext; pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir); /* * Since we may be changing the caching of the stack and page * table itself, we invoke an assembly helper to do the * following steps: * * - flush the cache so we start with an empty slate * - install pgtables[] as the real page table * - flush the TLB so the new page table takes effect */ irqmask = interrupt_mask_save_mask(); interrupt_mask_set_mask(-1ULL); rc = flush_and_install_context(__pa(pgtables), init_pgprot((unsigned long)pgtables), __get_cpu_var(current_asid), cpumask_bits(my_cpu_mask)); interrupt_mask_restore_mask(irqmask); BUG_ON(rc != 0); /* Copy the page table back to the normal swapper_pg_dir. */ memcpy(pgd_base, pgtables, sizeof(pgtables)); __install_page_table(pgd_base, __get_cpu_var(current_asid), swapper_pgprot); /* * We just read swapper_pgprot and thus brought it into the cache, * with its new home & caching mode. When we start the other CPUs, * they're going to reference swapper_pgprot via their initial fake * VA-is-PA mappings, which cache everything locally. At that * time, if it's in our cache with a conflicting home, the * simulator's coherence checker will complain. So, flush it out * of our cache; we're not going to ever use it again anyway. */ __insn_finv(&swapper_pgprot); }
/* * Powerstate information: The system enters/leaves a state, where * affected devices might stop */ static void tick_do_broadcast_on_off(unsigned long *reason) { struct clock_event_device *bc, *dev; struct tick_device *td; unsigned long flags; int cpu, bc_stopped; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); cpu = smp_processor_id(); td = &per_cpu(tick_cpu_device, cpu); dev = td->evtdev; bc = tick_broadcast_device.evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) goto out; if (!tick_device_is_functional(dev)) goto out; bc_stopped = cpumask_empty(tick_broadcast_mask); switch (*reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) tick_broadcast_force = 1; break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: if (tick_broadcast_force) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (!tick_device_is_functional(dev)) break; if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; } if (cpumask_empty(tick_broadcast_mask)) { if (!bc_stopped) clockevents_shutdown(bc); } else if (bc_stopped) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc); } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); }
int viridian_hypercall(struct cpu_user_regs *regs) { struct vcpu *curr = current; struct domain *currd = curr->domain; int mode = hvm_guest_x86_mode(curr); unsigned long input_params_gpa, output_params_gpa; uint16_t status = HV_STATUS_SUCCESS; union hypercall_input { uint64_t raw; struct { uint16_t call_code; uint16_t fast:1; uint16_t rsvd1:15; uint16_t rep_count:12; uint16_t rsvd2:4; uint16_t rep_start:12; uint16_t rsvd3:4; }; } input; union hypercall_output { uint64_t raw; struct { uint16_t result; uint16_t rsvd1; uint32_t rep_complete:12; uint32_t rsvd2:20; }; } output = { 0 }; ASSERT(is_viridian_domain(currd)); switch ( mode ) { case 8: input.raw = regs->rcx; input_params_gpa = regs->rdx; output_params_gpa = regs->r8; break; case 4: input.raw = (regs->rdx << 32) | regs->_eax; input_params_gpa = (regs->rbx << 32) | regs->_ecx; output_params_gpa = (regs->rdi << 32) | regs->_esi; break; default: goto out; } switch ( input.call_code ) { case HvNotifyLongSpinWait: /* * See Microsoft Hypervisor Top Level Spec. section 18.5.1. */ perfc_incr(mshv_call_long_wait); do_sched_op(SCHEDOP_yield, guest_handle_from_ptr(NULL, void)); status = HV_STATUS_SUCCESS; break; case HvFlushVirtualAddressSpace: case HvFlushVirtualAddressList: { cpumask_t *pcpu_mask; struct vcpu *v; struct { uint64_t address_space; uint64_t flags; uint64_t vcpu_mask; } input_params; /* * See Microsoft Hypervisor Top Level Spec. sections 12.4.2 * and 12.4.3. */ perfc_incr(mshv_call_flush); /* These hypercalls should never use the fast-call convention. */ status = HV_STATUS_INVALID_PARAMETER; if ( input.fast ) break; /* Get input parameters. */ if ( hvm_copy_from_guest_phys(&input_params, input_params_gpa, sizeof(input_params)) != HVMCOPY_okay ) break; /* * It is not clear from the spec. if we are supposed to * include current virtual CPU in the set or not in this case, * so err on the safe side. */ if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS ) input_params.vcpu_mask = ~0ul; pcpu_mask = &this_cpu(ipi_cpumask); cpumask_clear(pcpu_mask); /* * For each specified virtual CPU flush all ASIDs to invalidate * TLB entries the next time it is scheduled and then, if it * is currently running, add its physical CPU to a mask of * those which need to be interrupted to force a flush. */ for_each_vcpu ( currd, v ) { if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) ) break; if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) ) continue; hvm_asid_flush_vcpu(v); if ( v != curr && v->is_running ) __cpumask_set_cpu(v->processor, pcpu_mask); } /* * Since ASIDs have now been flushed it just remains to * force any CPUs currently running target vCPUs out of non- * root mode. It's possible that re-scheduling has taken place * so we may unnecessarily IPI some CPUs. */ if ( !cpumask_empty(pcpu_mask) ) smp_send_event_check_mask(pcpu_mask); output.rep_complete = input.rep_count; status = HV_STATUS_SUCCESS; break; } default: status = HV_STATUS_INVALID_HYPERCALL_CODE; break; } out: output.result = status; switch (mode) { case 8: regs->rax = output.raw; break; default: regs->rdx = output.raw >> 32; regs->rax = (uint32_t)output.raw; break; } return HVM_HCALL_completed; }
/* * This cpu is going to be removed and its vectors migrated to the remaining * online cpus. Check to see if there are enough vectors in the remaining cpus. * This function is protected by stop_machine(). */ int check_irq_vectors_for_cpu_disable(void) { unsigned int this_cpu, vector, this_count, count; struct irq_desc *desc; struct irq_data *data; int cpu; this_cpu = smp_processor_id(); cpumask_copy(&online_new, cpu_online_mask); cpumask_clear_cpu(this_cpu, &online_new); this_count = 0; for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { desc = __this_cpu_read(vector_irq[vector]); if (IS_ERR_OR_NULL(desc)) continue; /* * Protect against concurrent action removal, affinity * changes etc. */ raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, irq_data_get_affinity_mask(data)); cpumask_clear_cpu(this_cpu, &affinity_new); /* Do not count inactive or per-cpu irqs. */ if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) { raw_spin_unlock(&desc->lock); continue; } raw_spin_unlock(&desc->lock); /* * A single irq may be mapped to multiple cpu's * vector_irq[] (for example IOAPIC cluster mode). In * this case we have two possibilities: * * 1) the resulting affinity mask is empty; that is * this the down'd cpu is the last cpu in the irq's * affinity mask, or * * 2) the resulting affinity mask is no longer a * subset of the online cpus but the affinity mask is * not zero; that is the down'd cpu is the last online * cpu in a user set affinity mask. */ if (cpumask_empty(&affinity_new) || !cpumask_subset(&affinity_new, &online_new)) this_count++; } /* No need to check any further. */ if (!this_count) return 0; count = 0; for_each_online_cpu(cpu) { if (cpu == this_cpu) continue; /* * We scan from FIRST_EXTERNAL_VECTOR to first system * vector. If the vector is marked in the used vectors * bitmap or an irq is assigned to it, we don't count * it as available. * * As this is an inaccurate snapshot anyway, we can do * this w/o holding vector_lock. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < FIRST_SYSTEM_VECTOR; vector++) { if (!test_bit(vector, used_vectors) && IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) { if (++count == this_count) return 0; } } } if (count < this_count) { pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", this_cpu, this_count, count); return -ERANGE; } return 0; }
static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { int cpu, vcpu, gva_n, max_gvas; struct hv_tlb_flush **flush_pcpu; struct hv_tlb_flush *flush; u64 status = U64_MAX; unsigned long flags; trace_hyperv_mmu_flush_tlb_others(cpus, info); if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) return; local_irq_save(flags); flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; if (unlikely(!flush)) { local_irq_restore(flags); goto do_native; } if (info->mm) { /* * AddressSpace argument must match the CR3 with PCID bits * stripped out. */ flush->address_space = virt_to_phys(info->mm->pgd); flush->address_space &= CR3_ADDR_MASK; flush->flags = 0; } else { flush->address_space = 0; flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; } flush->processor_mask = 0; if (cpumask_equal(cpus, cpu_present_mask)) { flush->flags |= HV_FLUSH_ALL_PROCESSORS; } else { /* * From the supplied CPU set we need to figure out if we can get * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} * hypercalls. This is possible when the highest VP number in * the set is < 64. As VP numbers are usually in ascending order * and match Linux CPU ids, here is an optimization: we check * the VP number for the highest bit in the supplied set first * so we can quickly find out if using *_EX hypercalls is a * must. We will also check all VP numbers when walking the * supplied CPU set to remain correct in all cases. */ if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64) goto do_ex_hypercall; for_each_cpu(cpu, cpus) { vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) { local_irq_restore(flags); goto do_native; } if (vcpu >= 64) goto do_ex_hypercall; __set_bit(vcpu, (unsigned long *) &flush->processor_mask); } }
/* Shared #MC handler. */ void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code, struct mca_banks *bankmask, struct mca_banks *clear_bank) { uint64_t gstatus; mctelem_cookie_t mctc = NULL; struct mca_summary bs; mce_spin_lock(&mce_logout_lock); if (clear_bank != NULL) { memset( clear_bank->bank_map, 0x0, sizeof(long) * BITS_TO_LONGS(clear_bank->num)); } mctc = mcheck_mca_logout(MCA_MCE_SCAN, bankmask, &bs, clear_bank); if (bs.errcnt) { /* * Uncorrected errors must be dealt with in softirq context. */ if (bs.uc || bs.pcc) { add_taint(TAINT_MACHINE_CHECK); if (mctc != NULL) mctelem_defer(mctc); /* * For PCC=1 and can't be recovered, context is lost, so * reboot now without clearing the banks, and deal with * the telemetry after reboot (the MSRs are sticky) */ if (bs.pcc || !bs.recoverable) cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus); } else { if (mctc != NULL) mctelem_commit(mctc); } atomic_set(&found_error, 1); /* The last CPU will be take check/clean-up etc */ atomic_set(&severity_cpu, smp_processor_id()); mce_printk(MCE_CRITICAL, "MCE: clear_bank map %lx on CPU%d\n", *((unsigned long*)clear_bank), smp_processor_id()); if (clear_bank != NULL) mcheck_mca_clearbanks(clear_bank); } else { if (mctc != NULL) mctelem_dismiss(mctc); } mce_spin_unlock(&mce_logout_lock); mce_barrier_enter(&mce_trap_bar); if ( mctc != NULL && mce_urgent_action(regs, mctc)) cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus); mce_barrier_exit(&mce_trap_bar); /* * Wait until everybody has processed the trap. */ mce_barrier_enter(&mce_trap_bar); if (atomic_read(&severity_cpu) == smp_processor_id()) { /* According to SDM, if no error bank found on any cpus, * something unexpected happening, we can't do any * recovery job but to reset the system. */ if (atomic_read(&found_error) == 0) mc_panic("MCE: No CPU found valid MCE, need reset\n"); if (!cpumask_empty(&mce_fatal_cpus)) { char *ebufp, ebuf[96] = "MCE: Fatal error happened on CPUs "; ebufp = ebuf + strlen(ebuf); cpumask_scnprintf(ebufp, 95 - strlen(ebuf), &mce_fatal_cpus); mc_panic(ebuf); } atomic_set(&found_error, 0); } mce_barrier_exit(&mce_trap_bar); /* Clear flags after above fatal check */ mce_barrier_enter(&mce_trap_bar); gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS); if ((gstatus & MCG_STATUS_MCIP) != 0) { mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step"); mca_wrmsr(MSR_IA32_MCG_STATUS, gstatus & ~MCG_STATUS_MCIP); } mce_barrier_exit(&mce_trap_bar); raise_softirq(MACHINE_CHECK_SOFTIRQ); }