static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) { struct pseries_errorlog *pseries_log; struct pseries_hp_errorlog *hp_elog; spin_lock(&ras_log_buf_lock); rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), rtas_get_error_log_max()); pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, PSERIES_ELOG_SECT_ID_HOTPLUG); hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; /* * Since PCI hotplug is not currently supported on pseries, put PCI * hotplug events on the ras_log_buf to be handled by rtas_errd. */ if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) queue_hotplug_event(hp_elog); else log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); spin_unlock(&ras_log_buf_lock); return IRQ_HANDLED; }
/** * Find the data portion of an IO Event section from event log. * @elog: RTAS error/event log. * * Return: * pointer to a valid IO event section data. NULL if not found. */ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) { struct pseries_errorlog *sect; /* We should only ever get called for io-event interrupts, but if * we do get called for another type then something went wrong so * make some noise about it. * RTAS_TYPE_IO only exists in extended event log version 6 or later. * No need to check event log version. */ if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) { printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d", rtas_error_type(elog)); return NULL; } sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT); if (unlikely(!sect)) { printk_once(KERN_WARNING "io_event_irq: RTAS extended event " "log does not contain an IO Event section. " "Could be a bug in system firmware!\n"); return NULL; } return (struct pseries_io_event *) §->data; }
static void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; char action_code; char modifier; pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); if (pseries_log == NULL) return; epow_log = (struct epow_errorlog *)pseries_log->data; action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ switch (action_code) { case EPOW_RESET: if (num_epow_events) { pr_info("Non critical power/cooling issue cleared\n"); num_epow_events--; } break; case EPOW_WARN_COOLING: pr_info("Non-critical cooling issue detected. Check RTAS error" " log for details\n"); break; case EPOW_WARN_POWER: pr_info("Non-critical power issue detected. Check RTAS error" " log for details\n"); break; case EPOW_SYSTEM_SHUTDOWN: handle_system_shutdown(epow_log->event_modifier); break; case EPOW_SYSTEM_HALT: pr_emerg("Critical power/cooling issue detected. Check RTAS" " error log for details. Powering off.\n"); orderly_poweroff(true); break; case EPOW_MAIN_ENCLOSURE: case EPOW_POWER_OFF: pr_emerg("System about to lose power. Check RTAS error log " " for details. Powering off immediately.\n"); emergency_sync(); kernel_power_off(); break; default: pr_err("Unknown power/cooling event (action code = %d)\n", action_code); } /* Increment epow events counter variable */ if (action_code != EPOW_RESET) num_epow_events++; }
void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; char action_code; char modifier; pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); if (pseries_log == NULL) return; epow_log = (struct epow_errorlog *)pseries_log->data; action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ switch (action_code) { case EPOW_RESET: pr_err("Non critical power or cooling issue cleared"); break; case EPOW_WARN_COOLING: pr_err("Non critical cooling issue reported by firmware"); pr_err("Check RTAS error log for details"); break; case EPOW_WARN_POWER: pr_err("Non critical power issue reported by firmware"); pr_err("Check RTAS error log for details"); break; case EPOW_SYSTEM_SHUTDOWN: handle_system_shutdown(epow_log->event_modifier); break; case EPOW_SYSTEM_HALT: pr_emerg("Firmware initiated power off"); orderly_poweroff(1); break; case EPOW_MAIN_ENCLOSURE: case EPOW_POWER_OFF: pr_emerg("Critical power/cooling issue reported by firmware"); pr_emerg("Check RTAS error log for details"); pr_emerg("Immediate power off"); emergency_sync(); kernel_power_off(); break; default: pr_err("Unknown power/cooling event (action code %d)", action_code); } }
static int mce_handle_error(struct rtas_error_log *errp) { struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log; int disposition = rtas_error_disposition(errp); u8 error_type; if (!rtas_error_extended(errp)) goto out; pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); if (pseries_log == NULL) goto out; mce_log = (struct pseries_mc_errorlog *)pseries_log->data; error_type = mce_log->error_type; #ifdef CONFIG_PPC_BOOK3S_64 if (disposition == RTAS_DISP_NOT_RECOVERED) { switch (error_type) { case MC_ERROR_TYPE_SLB: case MC_ERROR_TYPE_ERAT: /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. * There are chances that we may hit MCE again if there * is a parity error on the SLB entry we trying to read * for saving. Hence limit the slb saving to single * level of recursion. */ if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); flush_and_reload_slb(); disposition = RTAS_DISP_FULLY_RECOVERED; rtas_set_disposition_recovered(errp); break; default: break; } } #endif out: return disposition; }
static void pseries_process_ue(struct pt_regs *regs, struct rtas_error_log *errp) { struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log; if (!rtas_error_extended(errp)) return; pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); if (!pseries_log) return; mce_log = (struct pseries_mc_errorlog *)pseries_log->data; if (mce_log->error_type == MC_ERROR_TYPE_UE) pseries_do_memory_failure(regs, mce_log); }
static void pseries_print_mce_info(struct pt_regs *regs, struct rtas_error_log *errp) { const char *level, *sevstr; struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log; u8 error_type, err_sub_type; u64 addr; u8 initiator = rtas_error_initiator(errp); int disposition = rtas_error_disposition(errp); static const char * const initiators[] = { "Unknown", "CPU", "PCI", "ISA", "Memory", "Power Mgmt", }; static const char * const mc_err_types[] = { "UE", "SLB", "ERAT", "TLB", "D-Cache", "Unknown", "I-Cache", }; static const char * const mc_ue_types[] = { "Indeterminate", "Instruction fetch", "Page table walk ifetch", "Load/Store", "Page table walk Load/Store", }; /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ static const char * const mc_slb_types[] = { "Parity", "Multihit", "Indeterminate", }; /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ static const char * const mc_soft_types[] = { "Unknown", "Parity", "Multihit", "Indeterminate", }; if (!rtas_error_extended(errp)) { pr_err("Machine check interrupt: Missing extended error log\n"); return; } pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); if (pseries_log == NULL) return; mce_log = (struct pseries_mc_errorlog *)pseries_log->data; error_type = mce_log->error_type; err_sub_type = rtas_mc_error_sub_type(mce_log); switch (rtas_error_severity(errp)) { case RTAS_SEVERITY_NO_ERROR: level = KERN_INFO; sevstr = "Harmless"; break; case RTAS_SEVERITY_WARNING: level = KERN_WARNING; sevstr = ""; break; case RTAS_SEVERITY_ERROR: case RTAS_SEVERITY_ERROR_SYNC: level = KERN_ERR; sevstr = "Severe"; break; case RTAS_SEVERITY_FATAL: default: level = KERN_ERR; sevstr = "Fatal"; break; } #ifdef CONFIG_PPC_BOOK3S_64 /* Display faulty slb contents for SLB errors. */ if (error_type == MC_ERROR_TYPE_SLB) slb_dump_contents(local_paca->mce_faulty_slbs); #endif printk("%s%s Machine check interrupt [%s]\n", level, sevstr, disposition == RTAS_DISP_FULLY_RECOVERED ? "Recovered" : "Not recovered"); if (user_mode(regs)) { printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, regs->nip, current->pid, current->comm); } else { printk("%s NIP [%016lx]: %pS\n", level, regs->nip, (void *)regs->nip); } printk("%s Initiator: %s\n", level, VAL_TO_STRING(initiators, initiator)); switch (error_type) { case MC_ERROR_TYPE_UE: printk("%s Error type: %s [%s]\n", level, VAL_TO_STRING(mc_err_types, error_type), VAL_TO_STRING(mc_ue_types, err_sub_type)); break; case MC_ERROR_TYPE_SLB: printk("%s Error type: %s [%s]\n", level, VAL_TO_STRING(mc_err_types, error_type), VAL_TO_STRING(mc_slb_types, err_sub_type)); break; case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: printk("%s Error type: %s [%s]\n", level, VAL_TO_STRING(mc_err_types, error_type), VAL_TO_STRING(mc_soft_types, err_sub_type)); break; default: printk("%s Error type: %s\n", level, VAL_TO_STRING(mc_err_types, error_type)); break; } addr = rtas_mc_get_effective_addr(mce_log); if (addr) printk("%s Effective address: %016llx\n", level, addr); }