static int db8500_cpufreq_cooling_probe(struct platform_device *pdev) { struct thermal_cooling_device *cdev; struct cpumask mask_val; /* make sure cpufreq driver has been initialized */ if (!cpufreq_frequency_get_table(0)) return -EPROBE_DEFER; cpumask_set_cpu(0, &mask_val); cdev = cpufreq_cooling_register(&mask_val); if (IS_ERR(cdev)) { dev_err(&pdev->dev, "Failed to register cooling device\n"); return PTR_ERR(cdev); } platform_set_drvdata(pdev, cdev); dev_info(&pdev->dev, "Cooling device registered: %s\n", cdev->type); return 0; }
/* * cpudl_find - find the best (later-dl) CPU in the system * @cp: the cpudl max-heap context * @p: the task * @later_mask: a mask to fill in with the selected CPUs (or NULL) * * Returns: int - best CPU (heap maximum if suitable) */ int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask) { int best_cpu = -1; const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) && cpumask_and(later_mask, later_mask, cpu_active_mask)) { best_cpu = cpumask_any(later_mask); goto out; } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { best_cpu = cpudl_maximum(cp); if (later_mask) cpumask_set_cpu(best_cpu, later_mask); } out: WARN_ON(!cpu_present(best_cpu) && best_cpu != -1); return best_cpu; }
int __cpuinit __cpu_up(unsigned int cpu) { extern int __cpuinit smp4m_boot_one_cpu(int); extern int __cpuinit smp4d_boot_one_cpu(int); int ret=0; switch(sparc_cpu_model) { case sun4m: ret = smp4m_boot_one_cpu(cpu); break; case sun4d: ret = smp4d_boot_one_cpu(cpu); break; case sparc_leon: ret = leon_boot_one_cpu(cpu); break; case sun4e: printk("SUN4E\n"); BUG(); break; case sun4u: printk("SUN4U\n"); BUG(); break; default: printk("UNKNOWN!\n"); BUG(); break; } if (!ret) { cpumask_set_cpu(cpu, &smp_commenced_mask); while (!cpu_online(cpu)) mb(); } return ret; }
/* Register with the in-kernel thermal management */ static int amlogic_register_thermal(struct amlogic_thermal_platform_data *pdata) { int ret=0; struct cpumask mask_val; memset(&mask_val,0,sizeof(struct cpumask)); cpumask_set_cpu(0, &mask_val); pdata->cpu_cool_dev= cpufreq_cooling_register(&mask_val); if (IS_ERR(pdata->cpu_cool_dev)) { pr_err("Failed to register cpufreq cooling device\n"); ret = -EINVAL; goto err_unregister; } pdata->cpucore_cool_dev = cpucore_cooling_register(); if (IS_ERR(pdata->cpucore_cool_dev)) { pr_err("Failed to register cpufreq cooling device\n"); ret = -EINVAL; goto err_unregister; } pdata->therm_dev = thermal_zone_device_register(pdata->name, pdata->temp_trip_count, 7, pdata, &amlogic_dev_ops, NULL, 0, pdata->idle_interval); if (IS_ERR(pdata->therm_dev)) { pr_err("Failed to register thermal zone device\n"); ret = -EINVAL; goto err_unregister; } pr_info("amlogic: Kernel Thermal management registered\n"); return 0; err_unregister: amlogic_unregister_thermal(pdata); return ret; }
int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; cpu_maps_update_begin(); first_cpu = cpumask_first(cpu_online_mask); /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); pr_info("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1); trace_suspend_resume(TPS("CPU_OFF"), cpu, false); if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { pr_err("Error taking CPU%d down: %d\n", cpu, error); break; } } if (!error) { BUG_ON(num_online_cpus() > 1); /* Make sure the CPUs won't be enabled by someone else */ cpu_hotplug_disabled = 1; } else { pr_err("Non-boot CPUs are not disabled\n"); } cpu_maps_update_done(); return error; }
void __init prom_init(void) { int *argv, *envp; /* passed as 32 bit ptrs */ struct psb_info *prom_infop; void *reset_vec; #ifdef CONFIG_SMP int i; #endif /* truncate to 32 bit and sign extend all args */ argv = (int *)(long)(int)fw_arg1; envp = (int *)(long)(int)fw_arg2; prom_infop = (struct psb_info *)(long)(int)fw_arg3; nlm_prom_info = *prom_infop; nlm_init_node(); /* Update reset entry point with CPU init code */ reset_vec = (void *)CKSEG1ADDR(RESET_VEC_PHYS); memset(reset_vec, 0, RESET_VEC_SIZE); memcpy(reset_vec, (void *)nlm_reset_entry, (nlm_reset_entry_end - nlm_reset_entry)); nlm_early_serial_setup(); build_arcs_cmdline(argv); prom_add_memory(); #ifdef CONFIG_SMP for (i = 0; i < 32; i++) if (nlm_prom_info.online_cpu_map & (1 << i)) cpumask_set_cpu(i, &nlm_cpumask); nlm_wakeup_secondary_cpus(); register_smp_ops(&nlm_smp_ops); #endif xlr_board_info_setup(); xlr_percpu_fmn_init(); }
/** * percpu_ida_free - free a tag * @pool: pool @tag was allocated from * @tag: a tag previously allocated with percpu_ida_alloc() * * Safe to be called from interrupt context. */ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) { struct percpu_ida_cpu *tags; unsigned long flags; unsigned nr_free; BUG_ON(tag >= pool->nr_tags); tags = raw_cpu_ptr(pool->tag_cpu); spin_lock_irqsave(&tags->lock, flags); tags->freelist[tags->nr_free++] = tag; nr_free = tags->nr_free; if (nr_free == 1) { cpumask_set_cpu(smp_processor_id(), &pool->cpus_have_tags); wake_up(&pool->wait); } spin_unlock_irqrestore(&tags->lock, flags); if (nr_free == pool->percpu_max_size) { spin_lock_irqsave(&pool->lock, flags); spin_lock(&tags->lock); if (tags->nr_free == pool->percpu_max_size) { move_tags(pool->freelist, &pool->nr_free, tags->freelist, &tags->nr_free, pool->percpu_batch_size); wake_up(&pool->wait); } spin_unlock(&tags->lock); spin_unlock_irqrestore(&pool->lock, flags); } }
void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); cpumask_set_cpu(cpu, &cpus_state_saved); } atomic_inc(&cpus_in_crash); smp_mb__after_atomic_inc(); /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. */ while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 1); #ifdef CONFIG_PPC64 kexec_smp_wait(); #else for (;;); /* FIXME */ #endif /* NOTREACHED */ }
void rq_attach_root(struct rq *rq, struct root_domain *rd) { struct root_domain *old_rd = NULL; unsigned long flags; raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { old_rd = rq->rd; if (cpumask_test_cpu(rq->cpu, old_rd->online)) set_rq_offline(rq); cpumask_clear_cpu(rq->cpu, old_rd->span); /* * If we dont want to free the old_rd yet then * set old_rd to NULL to skip the freeing later * in this function: */ if (!atomic_dec_and_test(&old_rd->refcount)) old_rd = NULL; } atomic_inc(&rd->refcount); rq->rd = rd; cpumask_set_cpu(rq->cpu, rd->span); if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) set_rq_online(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); if (old_rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); }
/* * Take a map of online CPUs and the number of available interrupt vectors * and generate an output cpumask suitable for spreading MSI/MSI-X vectors * so that they are distributed as good as possible around the CPUs. If * more vectors than CPUs are available we'll map one to each CPU, * otherwise we map one to the first sibling of each socket. * * If there are more vectors than CPUs we will still only have one bit * set per CPU, but interrupt code will keep on assigning the vectors from * the start of the bitmap until we run out of vectors. */ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) { struct cpumask *affinity_mask; unsigned int max_vecs = *nr_vecs; if (max_vecs == 1) return NULL; affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL); if (!affinity_mask) { *nr_vecs = 1; return NULL; } get_online_cpus(); if (max_vecs >= num_online_cpus()) { cpumask_copy(affinity_mask, cpu_online_mask); *nr_vecs = num_online_cpus(); } else { unsigned int vecs = 0, cpu; for_each_online_cpu(cpu) { if (cpu == get_first_sibling(cpu)) { cpumask_set_cpu(cpu, affinity_mask); vecs++; } if (--max_vecs == 0) break; } *nr_vecs = vecs; } put_online_cpus(); return affinity_mask; }
void noinline bench_outstanding_parallel_cpus(uint32_t loops, int nr_cpus, int outstanding_pages) { const char *desc = "parallel_cpus"; struct time_bench_sync sync; struct time_bench_cpu *cpu_tasks; struct cpumask my_cpumask; int i; /* Allocate records for CPUs */ cpu_tasks = kzalloc(sizeof(*cpu_tasks) * nr_cpus, GFP_KERNEL); /* Reduce number of CPUs to run on */ cpumask_clear(&my_cpumask); for (i = 0; i < nr_cpus ; i++) { cpumask_set_cpu(i, &my_cpumask); } pr_info("Limit to %d parallel CPUs\n", nr_cpus); time_bench_run_concurrent(loops, outstanding_pages, NULL, &my_cpumask, &sync, cpu_tasks, time_alloc_pages_outstanding); time_bench_print_stats_cpumask(desc, cpu_tasks, &my_cpumask); kfree(cpu_tasks); }
int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; cpu_maps_update_begin(); first_cpu = cpumask_first(cpu_online_mask); /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); printk("Disabling non-boot CPUs ... cpu_index=%u first_cpu=%u\n",smp_processor_id(),first_cpu); for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; printk("disable_nonboot_cpus cpu=%u\n",cpu); error = _cpu_down(cpu, 1); if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { printk(KERN_ERR "Error taking CPU%d down: %d\n", cpu, error); break; } } if (!error) { BUG_ON(num_online_cpus() > 1); /* Make sure the CPUs won't be enabled by someone else */ cpu_hotplug_disabled = 1; } else { printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } cpu_maps_update_done(); return error; }
static int __init hyperv_prepare_irq_remapping(void) { struct fwnode_handle *fn; int i; if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || !x2apic_supported()) return -ENODEV; fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); if (!fn) return -ENOMEM; ioapic_ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, IOAPIC_REMAPPING_ENTRY, fn, &hyperv_ir_domain_ops, NULL); irq_domain_free_fwnode(fn); /* * Hyper-V doesn't provide irq remapping function for * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. * Cpu's APIC ID is read from ACPI MADT table and APIC IDs * in the MADT table on Hyper-v are sorted monotonic increasingly. * APIC ID reflects cpu topology. There maybe some APIC ID * gaps when cpu number in a socket is not power of two. Prepare * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu * into ioapic_max_cpumask if its APIC ID is less than 256. */ for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--) if (cpu_physical_id(i) < 256) cpumask_set_cpu(i, &ioapic_max_cpumask); return 0; }
static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int cpu_id) { struct cpuinfo_arm *cpu_info; struct cpumask *cpumask; unsigned long cpuid; int cpu; cpumask = kzalloc(cpumask_size(), GFP_KERNEL); if (!cpumask) return -ENOMEM; for_each_possible_cpu(cpu) { cpu_info = &per_cpu(cpu_data, cpu); cpuid = is_smp() ? cpu_info->cpuid : read_cpuid_id(); /* read cpu id part number */ if ((cpuid & 0xFFF0) == cpu_id) cpumask_set_cpu(cpu, cpumask); } drv->cpumask = cpumask; return 0; }
/* Return a mask of the cpus whose caches currently own these pages. */ static void homecache_mask(struct page *page, int pages, struct cpumask *home_mask) { int i; cpumask_clear(home_mask); for (i = 0; i < pages; ++i) { int home = page_home(&page[i]); if (home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT) { cpumask_copy(home_mask, cpu_possible_mask); return; } #if CHIP_HAS_CBOX_HOME_MAP() if (home == PAGE_HOME_HASH) { cpumask_or(home_mask, home_mask, &hash_for_home_map); continue; } #endif if (home == PAGE_HOME_UNCACHED) continue; BUG_ON(home < 0 || home >= NR_CPUS); cpumask_set_cpu(home, home_mask); } }
static void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; int cpu; if (cpuid_topo->cluster_id == -1) { /* * DT does not contain topology information for this cpu * reset it to default behaviour */ pr_debug("CPU%u: No topology information configured\n", cpuid); cpuid_topo->core_id = 0; cpumask_set_cpu(cpuid, &cpuid_topo->core_sibling); cpumask_set_cpu(cpuid, &cpuid_topo->thread_sibling); return; } /* update core and thread sibling masks */ for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; if (cpuid_topo->cluster_id != cpu_topo->cluster_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); if (cpu != cpuid) cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); if (cpuid_topo->core_id != cpu_topo->core_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); if (cpu != cpuid) cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); } }
static int tegra_cpuidle_register(unsigned int cpu) { struct cpuidle_driver *drv; struct cpuidle_state *state; drv = &per_cpu(cpuidle_drv, cpu); drv->name = driver_name; drv->owner = owner; drv->cpumask = &per_cpu(idle_mask, cpu); cpumask_set_cpu(cpu, drv->cpumask); drv->state_count = 0; state = &drv->states[CPUIDLE_STATE_CLKGATING]; snprintf(state->name, CPUIDLE_NAME_LEN, "clock-gated"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPU clock gated"); state->exit_latency = 10; state->target_residency = 10; state->power_usage = 600; state->flags = CPUIDLE_FLAG_TIME_VALID; state->enter = tegra_idle_enter_clock_gating; #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED drv->safe_state_index = 0; #endif drv->state_count++; #ifdef CONFIG_PM_SLEEP state = &drv->states[CPUIDLE_STATE_POWERGATING]; snprintf(state->name, CPUIDLE_NAME_LEN, "powered-down"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPU power gated"); state->exit_latency = tegra_cpu_power_good_time(); state->target_residency = tegra_cpu_power_off_time() + tegra_cpu_power_good_time(); if (state->target_residency < tegra_pd_min_residency) state->target_residency = tegra_pd_min_residency; state->power_usage = 100; state->flags = CPUIDLE_FLAG_TIME_VALID; state->enter = tegra_idle_enter_pd; drv->state_count++; if (cpu == 0) { state = &drv->states[CPUIDLE_STATE_MC_CLK_STOP]; snprintf(state->name, CPUIDLE_NAME_LEN, "mc-clock"); snprintf(state->desc, CPUIDLE_DESC_LEN, "MC clock stop"); state->exit_latency = tegra_cpu_power_good_time() + DRAM_SELF_REFRESH_EXIT_LATENCY; state->target_residency = tegra_cpu_power_off_time() + tegra_cpu_power_good_time() + DRAM_SELF_REFRESH_EXIT_LATENCY; if (state->target_residency < tegra_mc_clk_stop_min_residency()) state->target_residency = tegra_mc_clk_stop_min_residency(); state->power_usage = 0; state->flags = CPUIDLE_FLAG_TIME_VALID; state->enter = tegra_idle_enter_pd; state->disabled = true; drv->state_count++; } #endif if (cpuidle_register(drv, NULL)) { pr_err("CPU%u: failed to register driver\n", cpu); return -EIO; } on_each_cpu_mask(drv->cpumask, tegra_cpuidle_setup_bctimer, (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1); return 0; }
/** * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector * @adapter: board private structure to initialize * @v_count: q_vectors allocated on adapter, used for ring interleaving * @v_idx: index of vector in adapter struct * @txr_count: total number of Tx rings to allocate * @txr_idx: index of first Tx ring to allocate * @rxr_count: total number of Rx rings to allocate * @rxr_idx: index of first Rx ring to allocate * * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, int v_count, int v_idx, int txr_count, int txr_idx, int rxr_count, int rxr_idx) { struct ixgbe_q_vector *q_vector; struct ixgbe_ring *ring; int node = -1; int cpu = -1; int ring_count, size; ring_count = txr_count + rxr_count; size = sizeof(struct ixgbe_q_vector) + (sizeof(struct ixgbe_ring) * ring_count); /* customize cpu for Flow Director mapping */ if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { if (cpu_online(v_idx)) { cpu = v_idx; node = cpu_to_node(cpu); } } /* allocate q_vector and rings */ q_vector = kzalloc_node(size, GFP_KERNEL, node); if (!q_vector) q_vector = kzalloc(size, GFP_KERNEL); if (!q_vector) return -ENOMEM; /* setup affinity mask and node */ if (cpu != -1) cpumask_set_cpu(cpu, &q_vector->affinity_mask); q_vector->numa_node = node; #ifdef CONFIG_IXGBE_DCA /* initialize CPU for DCA */ q_vector->cpu = -1; #endif /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; q_vector->adapter = adapter; q_vector->v_idx = v_idx; /* initialize work limits */ q_vector->tx.work_limit = adapter->tx_work_limit; /* initialize pointer to rings */ ring = q_vector->ring; /* intialize ITR */ if (txr_count && !rxr_count) { /* tx only vector */ if (adapter->tx_itr_setting == 1) q_vector->itr = IXGBE_10K_ITR; else q_vector->itr = adapter->tx_itr_setting; } else { /* rx or rx/tx vector */ if (adapter->rx_itr_setting == 1) q_vector->itr = IXGBE_20K_ITR; else q_vector->itr = adapter->rx_itr_setting; } while (txr_count) { /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; /* configure backlink on ring */ ring->q_vector = q_vector; /* update q_vector Tx values */ ixgbe_add_ring(ring, &q_vector->tx); /* apply Tx specific ring traits */ ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; /* update count and index */ txr_count--; txr_idx += v_count; /* push pointer to next ring */ ring++; } while (rxr_count) { /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; /* configure backlink on ring */ ring->q_vector = q_vector; /* update q_vector Rx values */ ixgbe_add_ring(ring, &q_vector->rx); /* * 82599 errata, UDP frames with a 0 checksum * can be marked as checksum errors. */ if (adapter->hw.mac.type == ixgbe_mac_82599EB) set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state); #ifdef IXGBE_FCOE if (adapter->netdev->features & NETIF_F_FCOE_MTU) { struct ixgbe_ring_feature *f; f = &adapter->ring_feature[RING_F_FCOE]; if ((rxr_idx >= f->offset) && (rxr_idx < f->offset + f->indices)) set_bit(__IXGBE_RX_FCOE, &ring->state); } #endif /* IXGBE_FCOE */ /* apply Rx specific ring traits */ ring->count = adapter->rx_ring_count; ring->queue_index = rxr_idx; /* assign ring to adapter */ adapter->rx_ring[rxr_idx] = ring; /* update count and index */ rxr_count--; rxr_idx += v_count; /* push pointer to next ring */ ring++; } return 0; }
static void cpufreq_interactivex_timer(unsigned long data) { u64 delta_idle; u64 update_time; u64 *cpu_time_in_idle; u64 *cpu_idle_exit_time; struct timer_list *t; u64 now_idle = get_cpu_idle_time_us(data, &update_time); cpu_time_in_idle = &per_cpu(time_in_idle, data); cpu_idle_exit_time = &per_cpu(idle_exit_time, data); if (update_time == *cpu_idle_exit_time) return; delta_idle = cputime64_sub(now_idle, *cpu_time_in_idle); /* Scale up if there were no idle cycles since coming out of idle */ if (delta_idle == 0) { if (policy->cur == policy->max) return; if (nr_running() < 1) return; target_freq = policy->max; cpumask_set_cpu(data, &work_cpumask); queue_work(up_wq, &freq_scale_work); return; } /* * There is a window where if the cpu utlization can go from low to high * between the timer expiring, delta_idle will be > 0 and the cpu will * be 100% busy, preventing idle from running, and this timer from * firing. So setup another timer to fire to check cpu utlization. * Do not setup the timer if there is no scheduled work. */ t = &per_cpu(cpu_timer, data); if (!timer_pending(t) && nr_running() > 0) { *cpu_time_in_idle = get_cpu_idle_time_us( data, cpu_idle_exit_time); mod_timer(t, jiffies + 2); } if (policy->cur == policy->min) return; /* * Do not scale down unless we have been at this frequency for the * minimum sample time. */ if (cputime64_sub(update_time, freq_change_time) < min_sample_time) return; target_freq = policy->min; cpumask_set_cpu(data, &work_cpumask); queue_work(down_wq, &freq_scale_work); }
/** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * * When the next event is more than a tick into the future, stop the idle tick * Called either from the idle loop or from irq_exit() when an idle period was * just interrupted by an interrupt which did not cause a reschedule. */ void tick_nohz_stop_sched_tick(int inidle) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; struct tick_sched *ts; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; local_irq_save(flags); cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); /* * Call to tick_nohz_start_idle stops the last_update_time from being * updated. Thus, it must not be called in the event we are called from * irq_exit() with the prior state different than idle. */ if (!inidle && !ts->inidle) goto end; /* * Set ts->inidle unconditionally. Even if the system did not * switch to NOHZ mode the cpu frequency governers rely on the * update of the idle time accounting in tick_nohz_start_idle(). */ ts->inidle = 1; now = tick_nohz_start_idle(ts); /* * If this cpu is offline and it is the one which updates * jiffies, then give up the assignment and let it be taken by * the cpu which runs the tick timer next. If we don't drop * this here the jiffies might be stale and do_timer() never * invoked. */ if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; if (need_resched()) goto end; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; if (ratelimit < 10) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", local_softirq_pending()); ratelimit++; } goto end; } ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; /* * On SMP we really should only care for the CPU which * has the do_timer duty assigned. All other CPUs can * sleep as long as they want. */ if (cpu == tick_do_timer_cpu || tick_do_timer_cpu == TICK_DO_TIMER_NONE) time_delta = timekeeping_max_deferment(); else time_delta = KTIME_MAX; } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; } /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu */ if (!ts->tick_stopped && delta_jiffies == 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals * that there is no timer pending or at least extremely * far into the future (12 days for HZ=1000). In this * case we set the expiry to the end of time. */ if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { /* * Calculate the time delta for the next timer event. * If the time delta exceeds the maximum time delta * permitted by the current clocksource then adjust * the time delta accordingly to ensure the * clocksource does not wrap. */ time_delta = min_t(u64, time_delta, tick_period.tv64 * delta_jiffies); expires = ktime_add_ns(last_update, time_delta); } else { expires.tv64 = KTIME_MAX; } /* * If this cpu is the one which updates jiffies, then * give up the assignment and let it be taken by the * cpu which runs the tick timer next, which might be * this cpu as well. If we don't drop this here the * jiffies might be stale and do_timer() never * invoked. */ if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; if (delta_jiffies > 1) cpumask_set_cpu(cpu, nohz_cpu_mask); /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when * interrupts arrive which do not cause a reschedule. In the * first call we save the current tick time, so we can restart * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { if (select_nohz_load_balancer(1)) { /* * sched tick not stopped! */ cpumask_clear_cpu(cpu, nohz_cpu_mask); goto out; } ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; rcu_enter_nohz(); } ts->idle_sleeps++; /* Mark expires */ ts->idle_expires = expires; /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. */ if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) goto out; } else if (!tick_program_event(expires, 0)) goto out; /* * We are past the event already. So we crossed a * jiffie boundary. Update jiffies and raise the * softirq. */ tick_do_update_jiffies64(ktime_get()); cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); end: local_irq_restore(flags); }
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) { unsigned int i; unsigned int valid_states = 0; unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; #ifdef CONFIG_SMP static int blacklisted; #endif pr_debug("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP if (blacklisted) return blacklisted; blacklisted = acpi_cpufreq_blacklist(c); if (blacklisted) return blacklisted; #endif data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) { result = -ENOMEM; goto err_free; } perf = per_cpu_ptr(acpi_perf_data, cpu); data->acpi_perf_cpu = cpu; policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; result = acpi_processor_register_performance(perf, cpu); if (result) goto err_free_mask; policy->shared_type = perf->shared_type; /* * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { cpumask_copy(policy->cpus, perf->shared_cpu_map); } cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && !policy_is_shared(policy)) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, topology_sibling_cpumask(cpu)); policy->shared_type = CPUFREQ_SHARED_TYPE_HW; pr_info_once(PFX "overriding BIOS provided _PSD data\n"); } #endif /* capability check */ if (perf->state_count <= 1) { pr_debug("No P-States\n"); result = -ENODEV; goto err_unreg; } if (perf->control_register.space_id != perf->status_register.space_id) { result = -ENODEV; goto err_unreg; } switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 0xf) { pr_debug("AMD K8 systems must use native drivers.\n"); result = -ENODEV; goto err_unreg; } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); if (check_est_cpu(cpu)) { data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; break; } if (check_amd_hwpstate_cpu(cpu)) { data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; break; } result = -ENODEV; goto err_unreg; default: pr_debug("Unknown addr space %d\n", (u32) (perf->control_register.space_id)); result = -ENODEV; goto err_unreg; } data->freq_table = kzalloc(sizeof(*data->freq_table) * (perf->state_count+1), GFP_KERNEL); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; } /* detect transition latency */ policy->cpuinfo.transition_latency = 0; for (i = 0; i < perf->state_count; i++) { if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { policy->cpuinfo.transition_latency = 20 * 1000; printk_once(KERN_INFO "P-state transition latency capped at 20 uS\n"); } /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; data->freq_table[valid_states].driver_data = i; data->freq_table[valid_states].frequency = perf->states[i].core_frequency * 1000; valid_states++; } data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; perf->state = 0; result = cpufreq_table_validate_and_show(policy, data->freq_table); if (result) goto err_freqfree; if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: /* * The core will not set policy->cur, because * cpufreq_driver->get is NULL, so we need to set it here. * However, we have to guess it, because the current speed is * unknown and not detectable via IO ports. */ policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); break; case ACPI_ADR_SPACE_FIXED_HARDWARE: acpi_cpufreq_driver.get = get_cur_freq_on_cpu; break; default: break; } /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); pr_debug("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n", (i == perf->state ? '*' : ' '), i, (u32) perf->states[i].core_frequency, (u32) perf->states[i].power, (u32) perf->states[i].transition_latency); /* * the first call to ->target() should result in us actually * writing something to the appropriate registers. */ data->resume = 1; return result; err_freqfree: kfree(data->freq_table); err_unreg: acpi_processor_unregister_performance(cpu); err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: kfree(data); policy->driver_data = NULL; return result; }
/* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. */ static void __cpuinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; /* * If waken up by an INIT in an 82489DX configuration * we may get here before an INIT-deassert IPI reaches * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. * * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. */ cpuid = smp_processor_id(); if (apic->wait_for_init_deassert && cpuid != 0) apic->wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) */ phys_id = read_apic_id(); if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, phys_id, cpuid); } pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); /* * STARTUP IPIs are fragile beasts as they might sometimes * trigger some glue motherboard logic. Complete APIC bus * silence for 1 second, this overestimates the time the * boot CPU is spending to send the up to 2 STARTUP IPIs * by a factor of two. This should be enough. */ /* * Waiting 2s total for startup (udelay is not yet working) */ timeout = jiffies + 2*HZ; while (time_before(jiffies, timeout)) { /* * Has the boot CPU finished it's STARTUP sequence? */ if (cpumask_test_cpu(cpuid, cpu_callout_mask)) break; cpu_relax(); } if (!time_before(jiffies, timeout)) { panic("%s: CPU%d started up but did not get a callout!\n", __func__, cpuid); } /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this * CPU, first the APIC. (this is probably redundant on most * boards) */ pr_debug("CALLIN, before setup_local_APIC()\n"); if (apic->smp_callin_clear_local_apic) apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); /* * Need to setup vector mappings before we enable interrupts. */ setup_vector_irq(smp_processor_id()); /* * Save our processor parameters. Note: this information * is needed for clock calibration. */ smp_store_cpu_info(cpuid); /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. */ calibrate_delay(); cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); /* * This must be done before setting cpu_online_mask * or calling notify_cpu_starting. */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); /* * Allow the master to continue. */ cpumask_set_cpu(cpuid, cpu_callin_mask); }
int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; int cpu, ret = 0; ktime_t now; /* * If there is no broadcast device, tell the caller not to go * into deep idle. */ if (!tick_broadcast_device.evtdev) return -EBUSY; dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { /* * If the current CPU owns the hrtimer broadcast * mechanism, it cannot go deep idle and we do not add * the CPU to the broadcast mask. We don't have to go * through the EXIT path as the local timer is not * shutdown. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) goto out; /* * If the broadcast device is in periodic mode, we * return. */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { /* If it is a hrtimer based broadcast, return busy */ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) ret = -EBUSY; goto out; } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be * woken by the IPI right away; we return * busy, so the CPU does not try to go deep * idle. */ if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { ret = -EBUSY; } else if (dev->next_event < bc->next_event) { tick_broadcast_set_event(bc, cpu, dev->next_event); /* * In case of hrtimer broadcasts the * programming might have moved the * timer to this cpu. If yes, remove * us from the broadcast mask and * return busy. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) { cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } } } } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast * pending mask and fired the broadcast * IPI. So we are going to handle the expired * event anyway via the broadcast IPI * handler. No need to reprogram the timer * with an already expired event. */ if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_pending_mask)) goto out; /* * Bail out if there is no next event. */ if (dev->next_event == KTIME_MAX) goto out; /* * If the pending bit is not set, then we are * either the CPU handling the broadcast * interrupt or we got woken by something else. * * We are not longer in the broadcast mask, so * if the cpu local expiry time is already * reached, we would reprogram the cpu local * timer with an already expired event. * * This can lead to a ping-pong when we return * to idle and therefor rearm the broadcast * timer before the cpu local timer was able * to fire. This happens because the forced * reprogramming makes sure that the event * will happen in the future and depending on * the min_delta setting this might be far * enough out that the ping-pong starts. * * If the cpu local next_event has expired * then we know that the broadcast timer * next_event has expired as well and * broadcast is about to be handled. So we * avoid reprogramming and enforce that the * broadcast handler, which did not run yet, * will invoke the cpu local handler. * * We cannot call the handler directly from * here, because we might be in a NOHZ phase * and we did not go through the irq_enter() * nohz fixups. */ now = ktime_get(); if (dev->next_event <= now) { cpumask_set_cpu(cpu, tick_broadcast_force_mask); goto out; } /* * We got woken by something else. Reprogram * the cpu local timer device. */ tick_program_event(dev->next_event, 1); } } out: raw_spin_unlock(&tick_broadcast_lock); return ret; }
/** * tick_broadcast_control - Enable/disable or force broadcast mode * @mode: The selected broadcast mode * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; unsigned long flags; /* Protects also the local clockevent device. */ raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) goto out; if (!tick_device_is_functional(dev)) goto out; cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); switch (mode) { case TICK_BROADCAST_FORCE: tick_broadcast_forced = 1; case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { /* * Only shutdown the cpu local device, if: * * - the broadcast device exists * - the broadcast device is not a hrtimer based one * - the broadcast device is in periodic mode to * avoid a hickup during switch to oneshot mode */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; case TICK_BROADCAST_OFF: if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; } if (bc) { if (cpumask_empty(tick_broadcast_mask)) { if (!bc_stopped) clockevents_shutdown(bc); } else if (bc_stopped) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc); } } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); }
/* * Handle oneshot mode broadcasting */ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) { struct tick_device *td; ktime_t now, next_event; int cpu, next_cpu = 0; bool bc_local; raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { /* * Required for !SMP because for_each_cpu() reports * unconditionally CPU0 as set on UP kernels. */ if (!IS_ENABLED(CONFIG_SMP) && cpumask_empty(tick_broadcast_oneshot_mask)) break; td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event <= now) { cpumask_set_cpu(cpu, tmpmask); /* * Mark the remote cpu in the pending mask, so * it can avoid reprogramming the cpu local * timer in tick_broadcast_oneshot_control(). */ cpumask_set_cpu(cpu, tick_broadcast_pending_mask); } else if (td->evtdev->next_event < next_event) { next_event = td->evtdev->next_event; next_cpu = cpu; } } /* * Remove the current cpu from the pending mask. The event is * delivered immediately in tick_do_broadcast() ! */ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); /* Take care of enforced broadcast requests */ cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); cpumask_clear(tick_broadcast_force_mask); /* * Sanity check. Catch the case where we try to broadcast to * offline cpus. */ if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) cpumask_and(tmpmask, tmpmask, cpu_online_mask); /* * Wakeup the cpus which have an expired event. */ bc_local = tick_do_broadcast(tmpmask); /* * Two reasons for reprogram: * * - The global event did not expire any CPU local * events. This happens in dyntick mode, as the maximum PIT * delta is quite small. * * - There are pending events on sleeping CPUs which were not * in the event mask */ if (next_event != KTIME_MAX) tick_broadcast_set_event(dev, next_cpu, next_event); raw_spin_unlock(&tick_broadcast_lock); if (bc_local) { td = this_cpu_ptr(&tick_cpu_device); td->evtdev->event_handler(td->evtdev); } }
/* * Check, if the device is disfunctional and a place holder, which * needs to be handled by the broadcast device. */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Devices might be registered with both periodic and oneshot * mode disabled. This signals, that the device needs to be * operated from the broadcast device and is a placeholder for * the cpu local device. */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc); ret = 1; } else { /* * Clear the broadcast bit for this cpu if the * device is not power state affected. */ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); else tick_device_setup_broadcast_func(dev); /* * Clear the broadcast bit if the CPU is not in * periodic broadcast on state. */ if (!cpumask_test_cpu(cpu, tick_broadcast_on)) cpumask_clear_cpu(cpu, tick_broadcast_mask); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_ONESHOT: /* * If the system is in oneshot mode we can * unconditionally clear the oneshot mask bit, * because the CPU is running and therefore * not in an idle state which causes the power * state affected device to stop. Let the * caller initialize the device. */ tick_broadcast_clear_oneshot(cpu); ret = 0; break; case TICKDEV_MODE_PERIODIC: /* * If the system is in periodic mode, check * whether the broadcast device can be * switched off now. */ if (cpumask_empty(tick_broadcast_mask) && bc) clockevents_shutdown(bc); /* * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt * is delivered by the broadcast device, if * the broadcast device exists and is not * hrtimer based. */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; }
static void __init local_ipi(void) { cpumask_clear(to_cpumask(nmi_ipi_mask)); cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); test_nmi_ipi(to_cpumask(nmi_ipi_mask)); }
/* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. */ static void smp_callin(void) { int cpuid, phys_id; /* * If waken up by an INIT in an 82489DX configuration * we may get here before an INIT-deassert IPI reaches * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. * * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. */ cpuid = smp_processor_id(); if (apic->wait_for_init_deassert && cpuid) while (!atomic_read(&init_deasserted)) cpu_relax(); /* * (This works even if the APIC is not enabled.) */ phys_id = read_apic_id(); /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this * CPU, first the APIC. (this is probably redundant on most * boards) */ pr_debug("CALLIN, before setup_local_APIC()\n"); if (apic->smp_callin_clear_local_apic) apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); /* * Need to setup vector mappings before we enable interrupts. */ setup_vector_irq(smp_processor_id()); /* * Save our processor parameters. Note: this information * is needed for clock calibration. */ smp_store_cpu_info(cpuid); /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. */ calibrate_delay(); cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); /* * This must be done before setting cpu_online_mask * or calling notify_cpu_starting. */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); /* * Allow the master to continue. */ cpumask_set_cpu(cpuid, cpu_callin_mask); }
/* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ asmlinkage void __cpuinit secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); aee_rr_rec_hoplug(cpu, 1, 0); printk("CPU%u: Booted secondary processor\n", cpu); /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); aee_rr_rec_hoplug(cpu, 2, 0); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); aee_rr_rec_hoplug(cpu, 3, 0); /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); aee_rr_rec_hoplug(cpu, 4, 0); flush_tlb_all(); aee_rr_rec_hoplug(cpu, 5, 0); preempt_disable(); aee_rr_rec_hoplug(cpu, 6, 0); trace_hardirqs_off(); aee_rr_rec_hoplug(cpu, 7, 0); if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); aee_rr_rec_hoplug(cpu, 8, 0); /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online * before we continue. */ set_cpu_online(cpu, true); aee_rr_rec_hoplug(cpu, 9, 0); complete(&cpu_running); aee_rr_rec_hoplug(cpu, 10, 0); smp_store_cpu_info(cpu); aee_rr_rec_hoplug(cpu, 11, 0); /* * Enable GIC and timers. */ notify_cpu_starting(cpu); aee_rr_rec_hoplug(cpu, 12, 0); local_dbg_enable(); aee_rr_rec_hoplug(cpu, 13, 0); local_irq_enable(); aee_rr_rec_hoplug(cpu, 14, 0); local_fiq_enable(); aee_rr_rec_hoplug(cpu, 15, 0); /* * OK, it's off to the idle thread for us */ cpu_startup_entry(CPUHP_ONLINE); aee_rr_rec_hoplug(cpu, 16, 0); }
/** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from * @state: task state for prepare_to_wait * * Returns a tag - an integer in the range [0..nr_tags) (passed to * tag_pool_init()), or otherwise -ENOSPC on allocation failure. * * Safe to be called from interrupt context (assuming it isn't passed * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. */ int percpu_ida_alloc(struct percpu_ida *pool, int state) { DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; unsigned long flags; int tag = -ENOSPC; tags = raw_cpu_ptr(pool->tag_cpu); spin_lock_irqsave(&tags->lock, flags); /* Fastpath */ if (likely(tags->nr_free)) { tag = tags->freelist[--tags->nr_free]; spin_unlock_irqrestore(&tags->lock, flags); return tag; } spin_unlock_irqrestore(&tags->lock, flags); while (1) { spin_lock_irqsave(&pool->lock, flags); tags = this_cpu_ptr(pool->tag_cpu); /* * prepare_to_wait() must come before steal_tags(), in case * percpu_ida_free() on another cpu flips a bit in * cpus_have_tags * * global lock held and irqs disabled, don't need percpu lock */ if (state != TASK_RUNNING) prepare_to_wait(&pool->wait, &wait, state); if (!tags->nr_free) alloc_global_tags(pool, tags); if (!tags->nr_free) steal_tags(pool, tags); if (tags->nr_free) { tag = tags->freelist[--tags->nr_free]; if (tags->nr_free) cpumask_set_cpu(smp_processor_id(), &pool->cpus_have_tags); } spin_unlock_irqrestore(&pool->lock, flags); if (tag >= 0 || state == TASK_RUNNING) break; if (signal_pending_state(state, current)) { tag = -ERESTARTSYS; break; } schedule(); } if (state != TASK_RUNNING) finish_wait(&pool->wait, &wait); return tag; }