static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) { struct node *node_dev = to_node(dev); const struct cpumask *mask = cpumask_of_node(node_dev->sysdev.id); int len; /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); len = type? cpulist_scnprintf(buf, PAGE_SIZE-2, mask) : cpumask_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; return len; }
static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; int xics_status[2]; int irq_server; irq = (unsigned int)irq_map[virq].hwirq; if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) return -1; status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", __func__, irq, status); return -1; } /* * For the moment only implement delivery to all cpus or one cpu. * Get current irq_server for the given irq */ irq_server = get_irq_server(virq, *cpumask, 1); if (irq_server == -1) { char cpulist[128]; cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, virq); return -1; } status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, irq_server, xics_status[1]); if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", __func__, irq, status); return -1; } return 0; }
static int ics_opal_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) { unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int16_t server; int8_t priority; int64_t rc; int wanted_server; if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return -1; rc = opal_get_xive(hw_irq, &server, &priority); if (rc != OPAL_SUCCESS) { pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)" " error %lld\n", __func__, d->irq, hw_irq, server, rc); return -1; } wanted_server = xics_get_irq_server(d->irq, cpumask, 1); if (wanted_server < 0) { char cpulist[128]; cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); pr_warning("%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, d->irq); return -1; } server = ics_opal_mangle_server(wanted_server); pr_devel("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n", d->irq, hw_irq, wanted_server, server); rc = opal_set_xive(hw_irq, server, priority); if (rc != OPAL_SUCCESS) { pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)" " error %lld\n", __func__, d->irq, hw_irq, server, rc); return -1; } return 0; }
static int ics_rtas_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) { unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int status; int xics_status[2]; int irq_server; if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return -1; status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", __func__, hw_irq, status); return -1; } irq_server = xics_get_irq_server(d->irq, cpumask, 1); if (irq_server == -1) { char cpulist[128]; cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, d->irq); return -1; } status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, irq_server, xics_status[1]); if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", __func__, hw_irq, status); return -1; } return IRQ_SET_MASK_OK; }
/* * This wrapper function around hv_flush_remote() does several things: * * - Provides a return value error-checking panic path, since * there's never any good reason for hv_flush_remote() to fail. * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally * is the type that Linux wants to pass around anyway. * - Centralizes the mark_caches_evicted() handling. * - Canonicalizes that lengths of zero make cpumasks NULL. * - Handles deferring TLB flushes for dataplane tiles. * - Tracks remote interrupts in the per-cpu irq_cpustat_t. * * Note that we have to wait until the cache flush completes before * updating the per-cpu last_cache_flush word, since otherwise another * concurrent flush can race, conclude the flush has already * completed, and start to use the page while it's still dirty * remotely (running concurrently with the actual evict, presumably). */ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, const struct cpumask *cache_cpumask_orig, HV_VirtAddr tlb_va, unsigned long tlb_length, unsigned long tlb_pgsize, const struct cpumask *tlb_cpumask_orig, HV_Remote_ASID *asids, int asidcount) { int rc; int timestamp = 0; /* happy compiler */ struct cpumask cache_cpumask_copy, tlb_cpumask_copy; struct cpumask *cache_cpumask, *tlb_cpumask; HV_PhysAddr cache_pa; char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5]; mb(); /* provided just to simplify "magic hypervisor" mode */ /* * Canonicalize and copy the cpumasks. */ if (cache_cpumask_orig && cache_control) { cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig); cache_cpumask = &cache_cpumask_copy; } else { cpumask_clear(&cache_cpumask_copy); cache_cpumask = NULL; } if (cache_cpumask == NULL) cache_control = 0; if (tlb_cpumask_orig && tlb_length) { cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig); tlb_cpumask = &tlb_cpumask_copy; } else { cpumask_clear(&tlb_cpumask_copy); tlb_cpumask = NULL; } hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, asids, asidcount); cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; if (cache_control & HV_FLUSH_EVICT_L2) timestamp = mark_caches_evicted_start(); rc = hv_flush_remote(cache_pa, cache_control, cpumask_bits(cache_cpumask), tlb_va, tlb_length, tlb_pgsize, cpumask_bits(tlb_cpumask), asids, asidcount); if (cache_control & HV_FLUSH_EVICT_L2) mark_caches_evicted_finish(cache_cpumask, timestamp); if (rc == 0) return; cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy); pr_err("hv_flush_remote(%#llx, %#lx, %p [%s]," " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", cache_pa, cache_control, cache_cpumask, cache_buf, (unsigned long)tlb_va, tlb_length, tlb_pgsize, tlb_cpumask, tlb_buf, asids, asidcount, rc); panic("Unsafe to continue."); }
static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode }; struct rps_sock_flow_table *orig_sock_table, *sock_table; static DEFINE_MUTEX(sock_flow_mutex); mutex_lock(&sock_flow_mutex); orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, lockdep_is_held(&sock_flow_mutex)); size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write) { if (size) { if (size > 1<<30) { /* Enforce limit to prevent overflow */ mutex_unlock(&sock_flow_mutex); return -EINVAL; } size = roundup_pow_of_two(size); if (size != orig_size) { sock_table = vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); if (!sock_table) { mutex_unlock(&sock_flow_mutex); return -ENOMEM; } sock_table->mask = size - 1; } else sock_table = orig_sock_table; for (i = 0; i < size; i++) sock_table->ents[i] = RPS_NO_CPU; } else sock_table = NULL; if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); if (sock_table) static_key_slow_inc(&rps_needed); if (orig_sock_table) { static_key_slow_dec(&rps_needed); synchronize_rcu(); vfree(orig_sock_table); } } } mutex_unlock(&sock_flow_mutex); return ret; } #endif /* CONFIG_RPS */ #ifdef CONFIG_NET_FLOW_LIMIT static DEFINE_MUTEX(flow_limit_update_mutex); static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct sd_flow_limit *cur; struct softnet_data *sd; cpumask_var_t mask; int i, len, ret = 0; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; if (write) { ret = cpumask_parse_user(buffer, *lenp, mask); if (ret) goto done; mutex_lock(&flow_limit_update_mutex); len = sizeof(*cur) + netdev_flow_limit_table_len; for_each_possible_cpu(i) { sd = &per_cpu(softnet_data, i); cur = rcu_dereference_protected(sd->flow_limit, lockdep_is_held(&flow_limit_update_mutex)); if (cur && !cpumask_test_cpu(i, mask)) { RCU_INIT_POINTER(sd->flow_limit, NULL); synchronize_rcu(); kfree(cur); } else if (!cur && cpumask_test_cpu(i, mask)) { cur = kzalloc_node(len, GFP_KERNEL, cpu_to_node(i)); if (!cur) { /* not unwinding previous changes */ ret = -ENOMEM; goto write_unlock; } cur->num_buckets = netdev_flow_limit_table_len; rcu_assign_pointer(sd->flow_limit, cur); } } write_unlock: mutex_unlock(&flow_limit_update_mutex); } else { char kbuf[128]; if (*ppos || !*lenp) { *lenp = 0; goto done; } cpumask_clear(mask); rcu_read_lock(); for_each_possible_cpu(i) { sd = &per_cpu(softnet_data, i); if (rcu_dereference(sd->flow_limit)) cpumask_set_cpu(i, mask); } rcu_read_unlock(); len = min(sizeof(kbuf) - 1, *lenp); len = cpumask_scnprintf(kbuf, len, mask); if (!len) { *lenp = 0; goto done; } if (len < *lenp) kbuf[len++] = '\n'; if (copy_to_user(buffer, kbuf, len)) { ret = -EFAULT; goto done; } *lenp = len; *ppos += len; } done: free_cpumask_var(mask); return ret; }
/* Shared #MC handler. */ void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code, struct mca_banks *bankmask, struct mca_banks *clear_bank) { uint64_t gstatus; mctelem_cookie_t mctc = NULL; struct mca_summary bs; mce_spin_lock(&mce_logout_lock); if (clear_bank != NULL) { memset( clear_bank->bank_map, 0x0, sizeof(long) * BITS_TO_LONGS(clear_bank->num)); } mctc = mcheck_mca_logout(MCA_MCE_SCAN, bankmask, &bs, clear_bank); if (bs.errcnt) { /* * Uncorrected errors must be dealt with in softirq context. */ if (bs.uc || bs.pcc) { add_taint(TAINT_MACHINE_CHECK); if (mctc != NULL) mctelem_defer(mctc); /* * For PCC=1 and can't be recovered, context is lost, so * reboot now without clearing the banks, and deal with * the telemetry after reboot (the MSRs are sticky) */ if (bs.pcc || !bs.recoverable) cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus); } else { if (mctc != NULL) mctelem_commit(mctc); } atomic_set(&found_error, 1); /* The last CPU will be take check/clean-up etc */ atomic_set(&severity_cpu, smp_processor_id()); mce_printk(MCE_CRITICAL, "MCE: clear_bank map %lx on CPU%d\n", *((unsigned long*)clear_bank), smp_processor_id()); if (clear_bank != NULL) mcheck_mca_clearbanks(clear_bank); } else { if (mctc != NULL) mctelem_dismiss(mctc); } mce_spin_unlock(&mce_logout_lock); mce_barrier_enter(&mce_trap_bar); if ( mctc != NULL && mce_urgent_action(regs, mctc)) cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus); mce_barrier_exit(&mce_trap_bar); /* * Wait until everybody has processed the trap. */ mce_barrier_enter(&mce_trap_bar); if (atomic_read(&severity_cpu) == smp_processor_id()) { /* According to SDM, if no error bank found on any cpus, * something unexpected happening, we can't do any * recovery job but to reset the system. */ if (atomic_read(&found_error) == 0) mc_panic("MCE: No CPU found valid MCE, need reset\n"); if (!cpumask_empty(&mce_fatal_cpus)) { char *ebufp, ebuf[96] = "MCE: Fatal error happened on CPUs "; ebufp = ebuf + strlen(ebuf); cpumask_scnprintf(ebufp, 95 - strlen(ebuf), &mce_fatal_cpus); mc_panic(ebuf); } atomic_set(&found_error, 0); } mce_barrier_exit(&mce_trap_bar); /* Clear flags after above fatal check */ mce_barrier_enter(&mce_trap_bar); gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS); if ((gstatus & MCG_STATUS_MCIP) != 0) { mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step"); mca_wrmsr(MSR_IA32_MCG_STATUS, gstatus & ~MCG_STATUS_MCIP); } mce_barrier_exit(&mce_trap_bar); raise_softirq(MACHINE_CHECK_SOFTIRQ); }
static ssize_t acpi_pad_idlecpus_show(struct device *dev, struct device_attribute *attr, char *buf) { return cpumask_scnprintf(buf, PAGE_SIZE, to_cpumask(pad_busy_cpus_bits)); }
int show_schedstat(struct seq_file *seq, void *v) { int cpu; int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; char *mask_str = kmalloc(mask_len, GFP_KERNEL); if (mask_str == NULL) return -ENOMEM; seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); seq_printf(seq, "timestamp %lu\n", jiffies); for_each_online_cpu(cpu) { struct rq *rq = cpu_rq(cpu); #ifdef CONFIG_SMP struct sched_domain *sd; int dcount = 0; #endif /* runqueue-specific stats */ seq_printf(seq, "cpu%d %u %u %u %u %u %u %llu %llu %lu", cpu, rq->yld_count, rq->sched_switch, rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, rq->rq_cpu_time, rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); seq_printf(seq, "\n"); #ifdef CONFIG_SMP /* domain-specific stats */ rcu_read_lock(); for_each_domain(cpu, sd) { enum cpu_idle_type itype; cpumask_scnprintf(mask_str, mask_len, sched_domain_span(sd)); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { seq_printf(seq, " %u %u %u %u %u %u %u %u", sd->lb_count[itype], sd->lb_balanced[itype], sd->lb_failed[itype], sd->lb_imbalance[itype], sd->lb_gained[itype], sd->lb_hot_gained[itype], sd->lb_nobusyq[itype], sd->lb_nobusyg[itype]); } seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u %u\n", sd->alb_count, sd->alb_failed, sd->alb_pushed, sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed, sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed, sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } rcu_read_unlock(); #endif } kfree(mask_str); return 0; }