/* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect * the actual r3 if possible, and a ptr to the error log entry * will be returned if found. * * Use one buffer mce_data_buf per cpu to store RTAS error. * * The mce_data_buf does not have any locks or protection around it, * if a second machine check comes in, or a system reset is done * before we have logged the error, then we will get corruption in the * error log. This is preferable over holding off on calling * ibm,nmi-interlock which would result in us checkstopping if a * second machine check did come in. */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { unsigned long *savep; struct rtas_error_log *h; /* Mask top two bits */ regs->gpr[3] &= ~(0x3UL << 62); if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; } savep = __va(regs->gpr[3]); regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); if (!rtas_error_extended(h)) { memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); } else { int len, error_log_length; error_log_length = 8 + rtas_error_extended_log_length(h); len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); memcpy(local_paca->mce_data_buf, h, len); } return (struct rtas_error_log *)local_paca->mce_data_buf; }
/* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect * the actual r3 if possible, and a ptr to the error log entry * will be returned if found. * * If the RTAS error is not of the extended type, then we put it in a per * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf. * * The global_mce_data_buf does not have any locks or protection around it, * if a second machine check comes in, or a system reset is done * before we have logged the error, then we will get corruption in the * error log. This is preferable over holding off on calling * ibm,nmi-interlock which would result in us checkstopping if a * second machine check did come in. */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { unsigned long *savep; struct rtas_error_log *h, *errhdr = NULL; /* Mask top two bits */ regs->gpr[3] &= ~(0x3UL << 62); if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; } savep = __va(regs->gpr[3]); regs->gpr[3] = savep[0]; /* restore original r3 */ /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); } else { int len, error_log_length; error_log_length = 8 + rtas_error_extended_log_length(h); len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX); memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); memcpy(global_mce_data_buf, h, len); errhdr = (struct rtas_error_log *)global_mce_data_buf; } return errhdr; }
static int mce_handle_error(struct rtas_error_log *errp) { struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log; int disposition = rtas_error_disposition(errp); u8 error_type; if (!rtas_error_extended(errp)) goto out; pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); if (pseries_log == NULL) goto out; mce_log = (struct pseries_mc_errorlog *)pseries_log->data; error_type = mce_log->error_type; #ifdef CONFIG_PPC_BOOK3S_64 if (disposition == RTAS_DISP_NOT_RECOVERED) { switch (error_type) { case MC_ERROR_TYPE_SLB: case MC_ERROR_TYPE_ERAT: /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. * There are chances that we may hit MCE again if there * is a parity error on the SLB entry we trying to read * for saving. Hence limit the slb saving to single * level of recursion. */ if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); flush_and_reload_slb(); disposition = RTAS_DISP_FULLY_RECOVERED; rtas_set_disposition_recovered(errp); break; default: break; } } #endif out: return disposition; }
static void pseries_process_ue(struct pt_regs *regs, struct rtas_error_log *errp) { struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log; if (!rtas_error_extended(errp)) return; pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); if (!pseries_log) return; mce_log = (struct pseries_mc_errorlog *)pseries_log->data; if (mce_log->error_type == MC_ERROR_TYPE_UE) pseries_do_memory_failure(regs, mce_log); }
static int log_rtas_len(char * buf) { int len; struct rtas_error_log *err; uint32_t extended_log_length; /* rtas fixed header */ len = 8; err = (struct rtas_error_log *)buf; extended_log_length = rtas_error_extended_log_length(err); if (rtas_error_extended(err) && extended_log_length) { /* extended header */ len += extended_log_length; } if (rtas_error_log_max == 0) rtas_error_log_max = rtas_get_error_log_max(); if (len > rtas_error_log_max) len = rtas_error_log_max; return len; }
static void pseries_print_mce_info(struct pt_regs *regs, struct rtas_error_log *errp) { const char *level, *sevstr; struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log; u8 error_type, err_sub_type; u64 addr; u8 initiator = rtas_error_initiator(errp); int disposition = rtas_error_disposition(errp); static const char * const initiators[] = { "Unknown", "CPU", "PCI", "ISA", "Memory", "Power Mgmt", }; static const char * const mc_err_types[] = { "UE", "SLB", "ERAT", "TLB", "D-Cache", "Unknown", "I-Cache", }; static const char * const mc_ue_types[] = { "Indeterminate", "Instruction fetch", "Page table walk ifetch", "Load/Store", "Page table walk Load/Store", }; /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ static const char * const mc_slb_types[] = { "Parity", "Multihit", "Indeterminate", }; /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ static const char * const mc_soft_types[] = { "Unknown", "Parity", "Multihit", "Indeterminate", }; if (!rtas_error_extended(errp)) { pr_err("Machine check interrupt: Missing extended error log\n"); return; } pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); if (pseries_log == NULL) return; mce_log = (struct pseries_mc_errorlog *)pseries_log->data; error_type = mce_log->error_type; err_sub_type = rtas_mc_error_sub_type(mce_log); switch (rtas_error_severity(errp)) { case RTAS_SEVERITY_NO_ERROR: level = KERN_INFO; sevstr = "Harmless"; break; case RTAS_SEVERITY_WARNING: level = KERN_WARNING; sevstr = ""; break; case RTAS_SEVERITY_ERROR: case RTAS_SEVERITY_ERROR_SYNC: level = KERN_ERR; sevstr = "Severe"; break; case RTAS_SEVERITY_FATAL: default: level = KERN_ERR; sevstr = "Fatal"; break; } #ifdef CONFIG_PPC_BOOK3S_64 /* Display faulty slb contents for SLB errors. */ if (error_type == MC_ERROR_TYPE_SLB) slb_dump_contents(local_paca->mce_faulty_slbs); #endif printk("%s%s Machine check interrupt [%s]\n", level, sevstr, disposition == RTAS_DISP_FULLY_RECOVERED ? "Recovered" : "Not recovered"); if (user_mode(regs)) { printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, regs->nip, current->pid, current->comm); } else { printk("%s NIP [%016lx]: %pS\n", level, regs->nip, (void *)regs->nip); } printk("%s Initiator: %s\n", level, VAL_TO_STRING(initiators, initiator)); switch (error_type) { case MC_ERROR_TYPE_UE: printk("%s Error type: %s [%s]\n", level, VAL_TO_STRING(mc_err_types, error_type), VAL_TO_STRING(mc_ue_types, err_sub_type)); break; case MC_ERROR_TYPE_SLB: printk("%s Error type: %s [%s]\n", level, VAL_TO_STRING(mc_err_types, error_type), VAL_TO_STRING(mc_slb_types, err_sub_type)); break; case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: printk("%s Error type: %s [%s]\n", level, VAL_TO_STRING(mc_err_types, error_type), VAL_TO_STRING(mc_soft_types, err_sub_type)); break; default: printk("%s Error type: %s\n", level, VAL_TO_STRING(mc_err_types, error_type)); break; } addr = rtas_mc_get_effective_addr(mce_log); if (addr) printk("%s Effective address: %016llx\n", level, addr); }