static int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; int cpu; if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) return 0; if (!atomic_read(&nmi_active)) return 0; prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) return -1; printk(KERN_INFO "Testing NMI watchdog ... "); if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ if (!cpu_isset(cpu, cpu_callin_map)) continue; #endif if (!per_cpu(wd_enabled, cpu)) continue; if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], nmi_count(cpu)); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } } endflag = 1; if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); return -1; } printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = lapic_adjust_nmi_hz(1); kfree(prev_nmi_count); return 0; }
static int __init check_nmi_watchdog(void) { volatile int endflag = 0; unsigned int *prev_nmi_count; int cpu; if (nmi_watchdog == NMI_NONE) return 0; prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) return -1; printk(KERN_INFO "Testing NMI watchdog ... "); if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ if (!cpu_isset(cpu, cpu_callin_map)) continue; #endif if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { endflag = 1; printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], nmi_count(cpu)); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; kfree(prev_nmi_count); return -1; } } endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; kfree(prev_nmi_count); return 0; }
int __init check_watchdog(void) { irq_cpustat_t tmp[1]; printk(KERN_INFO "Testing Watchdog... "); memcpy(tmp, irq_stat, sizeof(tmp)); local_irq_enable(); mdelay((10 * 1000) / watchdog_hz); /* wait 10 ticks */ local_irq_disable(); if (nmi_count(0) - tmp[0].__nmi_count <= 5) { printk(KERN_WARNING "CPU#%d: Watchdog appears to be stuck!\n", 0); return -1; } printk(KERN_INFO "OK.\n"); /* now that we know it works we can reduce NMI frequency to * something more reasonable; makes a difference in some configs */ watchdog_hz = 1; return 0; }
int __init check_nmi_watchdog (void) { irq_cpustat_t tmp[NR_CPUS]; int j, cpu; if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) return -1; printk(KERN_INFO "testing NMI watchdog ... "); memcpy(tmp, irq_stat, sizeof(tmp)); sti(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (j = 0; j < smp_num_cpus; j++) { cpu = cpu_logical_map(j); if (nmi_count(cpu) - tmp[cpu].__nmi_count <= 5) { printk("CPU#%d: NMI appears to be stuck!\n", cpu); return -1; } } printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; return 0; }
asmlinkage void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) { /* * Since current-> is always on the stack, and we always switch * the stack NMI-atomically, it's safe to use smp_processor_id(). */ int sum, cpu = smp_processor_id(); u8 wdt, tmp; wdt = WDCTR & ~WDCTR_WDCNE; WDCTR = wdt; tmp = WDCTR; NMICR = NMICR_WDIF; nmi_count(cpu)++; kstat_this_cpu.irqs[NMIIRQ]++; sum = irq_stat[cpu].__irq_count; if (last_irq_sums[cpu] == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ watchdog_alert_counter++; if (watchdog_alert_counter == 5 * watchdog_hz) { spin_lock(&watchdog_print_lock); /* * We are in trouble anyway, lets at least try * to get a message out. */ bust_spinlocks(1); printk(KERN_ERR "NMI Watchdog detected LOCKUP on CPU%d," " pc %08lx, registers:\n", cpu, regs->pc); show_registers(regs); printk("console shuts up ...\n"); console_silent(); spin_unlock(&watchdog_print_lock); bust_spinlocks(0); #ifdef CONFIG_GDBSTUB if (gdbstub_busy) gdbstub_exception(regs, excep); else gdbstub_intercept(regs, excep); #endif do_exit(SIGSEGV); } } else { last_irq_sums[cpu] = sum; watchdog_alert_counter = 0; } WDCTR = wdt | WDCTR_WDRST; tmp = WDCTR; WDCTR = wdt | WDCTR_WDCNE; tmp = WDCTR; }
int __init check_nmi_watchdog (void) { unsigned int prev_nmi_count[NR_CPUS]; int cpu; printk(KERN_INFO "testing NMI watchdog ... "); for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks /* FIXME: Only boot CPU is online at this stage. Check CPUs as they come up. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { if (!cpu_online(cpu)) continue; if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk("CPU#%d: NMI appears to be stuck!\n", cpu); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; return -1; } } printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; return 0; }
int __init check_nmi_watchdog (void) { unsigned int prev_nmi_count[NR_CPUS]; int j, cpu; printk(KERN_INFO "testing NMI watchdog ... "); for (j = 0; j < smp_num_cpus; j++) { cpu = cpu_logical_map(j); prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count; } sti(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (j = 0; j < smp_num_cpus; j++) { cpu = cpu_logical_map(j); if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk("CPU#%d: NMI appears to be stuck!\n", cpu); return -1; } } printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; return 0; }
int __init check_watchdog(void) { irq_cpustat_t tmp[1]; printk(KERN_INFO "Testing Watchdog... "); memcpy(tmp, irq_stat, sizeof(tmp)); local_irq_enable(); mdelay((10 * 1000) / watchdog_hz); /* */ local_irq_disable(); if (nmi_count(0) - tmp[0].__nmi_count <= 5) { printk(KERN_WARNING "CPU#%d: Watchdog appears to be stuck!\n", 0); return -1; } printk(KERN_INFO "OK.\n"); /* */ watchdog_hz = 1; return 0; }
int __init check_nmi_watchdog (void) { static unsigned int __initdata prev_nmi_count[NR_CPUS]; int cpu; bool_t ok = 1; if ( !nmi_watchdog ) return 0; printk("Testing NMI watchdog on all CPUs:"); for_each_online_cpu ( cpu ) prev_nmi_count[cpu] = nmi_count(cpu); /* Wait for 10 ticks. Busy-wait on all CPUs: the LAPIC counter that * the NMI watchdog uses only runs while the core's not halted */ if ( nmi_watchdog == NMI_LOCAL_APIC ) smp_call_function(wait_for_nmis, NULL, 0); wait_for_nmis(NULL); for_each_online_cpu ( cpu ) { if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 ) { printk(" %d", cpu); ok = 0; } } printk(" %s\n", ok ? "ok" : "stuck"); /* * Now that we know it works we can reduce NMI frequency to * something more reasonable; makes a difference in some configs. * There's a limit to how slow we can go because writing the perfctr * MSRs only sets the low 32 bits, with the top 8 bits sign-extended * from those, so it's not possible to set up a delay larger than * 2^31 cycles and smaller than (2^40 - 2^31) cycles. * (Intel SDM, section 18.22.2) */ if ( nmi_watchdog == NMI_LOCAL_APIC ) nmi_hz = max(1ul, cpu_khz >> 20); return 0; }
/* * Display interrupt management information through /proc/interrupts */ int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j, cpu; struct irqaction *action; unsigned long flags; switch (i) { /* display column title bar naming CPUs */ case 0: seq_printf(p, " "); for (j = 0; j < NR_CPUS; j++) if (cpu_online(j)) seq_printf(p, "CPU%d ", j); seq_putc(p, '\n'); break; /* display information rows, one per active CPU */ case 1 ... NR_IRQS - 1: spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { seq_printf(p, "%3d: ", i); for_each_present_cpu(cpu) seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %14s.%u", irq_desc[i].chip->name, (GxICR(i) & GxICR_LEVEL) >> GxICR_LEVEL_SHIFT); seq_printf(p, " %s", action->name); for (action = action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); } spin_unlock_irqrestore(&irq_desc[i].lock, flags); break; /* polish off with NMI and error counters */ case NR_IRQS: seq_printf(p, "NMI: "); for (j = 0; j < NR_CPUS; j++) if (cpu_online(j)) seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); break; } return 0; }
int __init check_nmi_watchdog (void) { static unsigned int __initdata prev_nmi_count[NR_CPUS]; int cpu; if ( !nmi_watchdog ) return 0; printk("Testing NMI watchdog --- "); for_each_online_cpu ( cpu ) prev_nmi_count[cpu] = nmi_count(cpu); local_irq_enable(); mdelay((10*1000)/nmi_hz); /* wait 10 ticks */ for_each_online_cpu ( cpu ) { if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 ) printk("CPU#%d stuck. ", cpu); else printk("CPU#%d okay. ", cpu); } printk("\n"); /* * Now that we know it works we can reduce NMI frequency to * something more reasonable; makes a difference in some configs. * There's a limit to how slow we can go because writing the perfctr * MSRs only sets the low 32 bits, with the top 8 bits sign-extended * from those, so it's not possible to set up a delay larger than * 2^31 cycles and smaller than (2^40 - 2^31) cycles. * (Intel SDM, section 18.22.2) */ if ( nmi_watchdog == NMI_LOCAL_APIC ) nmi_hz = max(1ul, cpu_khz >> 20); return 0; }
dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); #ifdef CONFIG_X86_32 { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } #else add_pda(__nmi_count, 1); #endif if (!ignore_nmis) default_do_nmi(regs); nmi_exit(); }
static void do_nmi_stats(unsigned char key) { int i; struct domain *d; struct vcpu *v; printk("CPU\tNMI\n"); for_each_online_cpu ( i ) printk("%3d\t%3d\n", i, nmi_count(i)); if ( ((d = dom0) == NULL) || (d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL) ) return; i = v->async_exception_mask & (1 << VCPU_TRAP_NMI); if ( v->nmi_pending || i ) printk("dom0 vpu0: NMI %s%s\n", v->nmi_pending ? "pending " : "", i ? "masked " : ""); else printk("dom0 vcpu0: NMI neither pending nor masked\n"); }
asmlinkage void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) { /* * Since current-> is always on the stack, and we always switch * the stack NMI-atomically, it's safe to use smp_processor_id(). */ int sum, cpu; int irq = NMIIRQ; u8 wdt, tmp; wdt = WDCTR & ~WDCTR_WDCNE; WDCTR = wdt; tmp = WDCTR; NMICR = NMICR_WDIF; nmi_count(smp_processor_id())++; kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); for_each_online_cpu(cpu) { sum = irq_stat[cpu].__irq_count; if ((last_irq_sums[cpu] == sum) #if defined(CONFIG_GDBSTUB) && defined(CONFIG_SMP) && !(CHK_GDBSTUB_BUSY() || atomic_read(&cpu_doing_single_step)) #endif ) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ watchdog_alert_counter[cpu]++; if (watchdog_alert_counter[cpu] == 5 * watchdog_hz) { spin_lock(&watchdog_print_lock); /* * We are in trouble anyway, lets at least try * to get a message out. */ bust_spinlocks(1); printk(KERN_ERR "NMI Watchdog detected LOCKUP on CPU%d," " pc %08lx, registers:\n", cpu, regs->pc); #ifdef CONFIG_SMP printk(KERN_ERR "--- Register Dump (CPU%d) ---\n", CPUID); #endif show_registers(regs); #ifdef CONFIG_SMP smp_nmi_call_function(watchdog_dump_register, NULL, 1); #endif printk(KERN_NOTICE "console shuts up ...\n"); console_silent(); spin_unlock(&watchdog_print_lock); bust_spinlocks(0); #ifdef CONFIG_GDBSTUB if (CHK_GDBSTUB_BUSY_AND_ACTIVE()) gdbstub_exception(regs, excep); else gdbstub_intercept(regs, excep); #endif do_exit(SIGSEGV); } } else { last_irq_sums[cpu] = sum; watchdog_alert_counter[cpu] = 0; } } WDCTR = wdt | WDCTR_WDRST; tmp = WDCTR; WDCTR = wdt | WDCTR_WDCNE; tmp = WDCTR; }
asmlinkage void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) { /* */ int sum, cpu; int irq = NMIIRQ; u8 wdt, tmp; wdt = WDCTR & ~WDCTR_WDCNE; WDCTR = wdt; tmp = WDCTR; NMICR = NMICR_WDIF; nmi_count(smp_processor_id())++; kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); for_each_online_cpu(cpu) { sum = irq_stat[cpu].__irq_count; if ((last_irq_sums[cpu] == sum) #if defined(CONFIG_GDBSTUB) && defined(CONFIG_SMP) && !(CHK_GDBSTUB_BUSY() || atomic_read(&cpu_doing_single_step)) #endif ) { /* */ watchdog_alert_counter[cpu]++; if (watchdog_alert_counter[cpu] == 5 * watchdog_hz) { spin_lock(&watchdog_print_lock); /* */ bust_spinlocks(1); printk(KERN_ERR "NMI Watchdog detected LOCKUP on CPU%d," " pc %08lx, registers:\n", cpu, regs->pc); #ifdef CONFIG_SMP printk(KERN_ERR "--- Register Dump (CPU%d) ---\n", CPUID); #endif show_registers(regs); #ifdef CONFIG_SMP smp_nmi_call_function(watchdog_dump_register, NULL, 1); #endif printk(KERN_NOTICE "console shuts up ...\n"); console_silent(); spin_unlock(&watchdog_print_lock); bust_spinlocks(0); #ifdef CONFIG_GDBSTUB if (CHK_GDBSTUB_BUSY_AND_ACTIVE()) gdbstub_exception(regs, excep); else gdbstub_intercept(regs, excep); #endif do_exit(SIGSEGV); } } else { last_irq_sums[cpu] = sum; watchdog_alert_counter[cpu] = 0; } } WDCTR = wdt | WDCTR_WDRST; tmp = WDCTR; WDCTR = wdt | WDCTR_WDCNE; tmp = WDCTR; }