Exemple #1
0
/*
 * Get the error information for errors coming through the
 * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 * the actual r3 if possible, and a ptr to the error log entry
 * will be returned if found.
 *
 * Use one buffer mce_data_buf per cpu to store RTAS error.
 *
 * The mce_data_buf does not have any locks or protection around it,
 * if a second machine check comes in, or a system reset is done
 * before we have logged the error, then we will get corruption in the
 * error log.  This is preferable over holding off on calling
 * ibm,nmi-interlock which would result in us checkstopping if a
 * second machine check did come in.
 */
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
	unsigned long *savep;
	struct rtas_error_log *h;

	/* Mask top two bits */
	regs->gpr[3] &= ~(0x3UL << 62);

	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
		return NULL;
	}

	savep = __va(regs->gpr[3]);
	regs->gpr[3] = be64_to_cpu(savep[0]);	/* restore original r3 */

	h = (struct rtas_error_log *)&savep[1];
	/* Use the per cpu buffer from paca to store rtas error log */
	memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
	if (!rtas_error_extended(h)) {
		memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
	} else {
		int len, error_log_length;

		error_log_length = 8 + rtas_error_extended_log_length(h);
		len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
		memcpy(local_paca->mce_data_buf, h, len);
	}

	return (struct rtas_error_log *)local_paca->mce_data_buf;
}
Exemple #2
0
/*
 * Get the error information for errors coming through the
 * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 * the actual r3 if possible, and a ptr to the error log entry
 * will be returned if found.
 *
 * If the RTAS error is not of the extended type, then we put it in a per
 * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf.
 *
 * The global_mce_data_buf does not have any locks or protection around it,
 * if a second machine check comes in, or a system reset is done
 * before we have logged the error, then we will get corruption in the
 * error log.  This is preferable over holding off on calling
 * ibm,nmi-interlock which would result in us checkstopping if a
 * second machine check did come in.
 */
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
	unsigned long *savep;
	struct rtas_error_log *h, *errhdr = NULL;

	/* Mask top two bits */
	regs->gpr[3] &= ~(0x3UL << 62);

	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
		return NULL;
	}

	savep = __va(regs->gpr[3]);
	regs->gpr[3] = savep[0];	/* restore original r3 */

	/* If it isn't an extended log we can use the per cpu 64bit buffer */
	h = (struct rtas_error_log *)&savep[1];
	if (!rtas_error_extended(h)) {
		memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64));
		errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf);
	} else {
		int len, error_log_length;

		error_log_length = 8 + rtas_error_extended_log_length(h);
		len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
		memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
		memcpy(global_mce_data_buf, h, len);
		errhdr = (struct rtas_error_log *)global_mce_data_buf;
	}

	return errhdr;
}
Exemple #3
0
static int mce_handle_error(struct rtas_error_log *errp)
{
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;
	int disposition = rtas_error_disposition(errp);
	u8 error_type;

	if (!rtas_error_extended(errp))
		goto out;

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (pseries_log == NULL)
		goto out;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
	error_type = mce_log->error_type;

#ifdef CONFIG_PPC_BOOK3S_64
	if (disposition == RTAS_DISP_NOT_RECOVERED) {
		switch (error_type) {
		case	MC_ERROR_TYPE_SLB:
		case	MC_ERROR_TYPE_ERAT:
			/*
			 * Store the old slb content in paca before flushing.
			 * Print this when we go to virtual mode.
			 * There are chances that we may hit MCE again if there
			 * is a parity error on the SLB entry we trying to read
			 * for saving. Hence limit the slb saving to single
			 * level of recursion.
			 */
			if (local_paca->in_mce == 1)
				slb_save_contents(local_paca->mce_faulty_slbs);
			flush_and_reload_slb();
			disposition = RTAS_DISP_FULLY_RECOVERED;
			rtas_set_disposition_recovered(errp);
			break;
		default:
			break;
		}
	}
#endif

out:
	return disposition;
}
Exemple #4
0
static void pseries_process_ue(struct pt_regs *regs,
			       struct rtas_error_log *errp)
{
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;

	if (!rtas_error_extended(errp))
		return;

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (!pseries_log)
		return;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;

	if (mce_log->error_type == MC_ERROR_TYPE_UE)
		pseries_do_memory_failure(regs, mce_log);
}
Exemple #5
0
static int log_rtas_len(char * buf)
{
	int len;
	struct rtas_error_log *err;
	uint32_t extended_log_length;

	/* rtas fixed header */
	len = 8;
	err = (struct rtas_error_log *)buf;
	extended_log_length = rtas_error_extended_log_length(err);
	if (rtas_error_extended(err) && extended_log_length) {

		/* extended header */
		len += extended_log_length;
	}

	if (rtas_error_log_max == 0)
		rtas_error_log_max = rtas_get_error_log_max();

	if (len > rtas_error_log_max)
		len = rtas_error_log_max;

	return len;
}
Exemple #6
0
static void pseries_print_mce_info(struct pt_regs *regs,
				   struct rtas_error_log *errp)
{
	const char *level, *sevstr;
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;
	u8 error_type, err_sub_type;
	u64 addr;
	u8 initiator = rtas_error_initiator(errp);
	int disposition = rtas_error_disposition(errp);

	static const char * const initiators[] = {
		"Unknown",
		"CPU",
		"PCI",
		"ISA",
		"Memory",
		"Power Mgmt",
	};
	static const char * const mc_err_types[] = {
		"UE",
		"SLB",
		"ERAT",
		"TLB",
		"D-Cache",
		"Unknown",
		"I-Cache",
	};
	static const char * const mc_ue_types[] = {
		"Indeterminate",
		"Instruction fetch",
		"Page table walk ifetch",
		"Load/Store",
		"Page table walk Load/Store",
	};

	/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
	static const char * const mc_slb_types[] = {
		"Parity",
		"Multihit",
		"Indeterminate",
	};

	/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
	static const char * const mc_soft_types[] = {
		"Unknown",
		"Parity",
		"Multihit",
		"Indeterminate",
	};

	if (!rtas_error_extended(errp)) {
		pr_err("Machine check interrupt: Missing extended error log\n");
		return;
	}

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (pseries_log == NULL)
		return;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;

	error_type = mce_log->error_type;
	err_sub_type = rtas_mc_error_sub_type(mce_log);

	switch (rtas_error_severity(errp)) {
	case RTAS_SEVERITY_NO_ERROR:
		level = KERN_INFO;
		sevstr = "Harmless";
		break;
	case RTAS_SEVERITY_WARNING:
		level = KERN_WARNING;
		sevstr = "";
		break;
	case RTAS_SEVERITY_ERROR:
	case RTAS_SEVERITY_ERROR_SYNC:
		level = KERN_ERR;
		sevstr = "Severe";
		break;
	case RTAS_SEVERITY_FATAL:
	default:
		level = KERN_ERR;
		sevstr = "Fatal";
		break;
	}

#ifdef CONFIG_PPC_BOOK3S_64
	/* Display faulty slb contents for SLB errors. */
	if (error_type == MC_ERROR_TYPE_SLB)
		slb_dump_contents(local_paca->mce_faulty_slbs);
#endif

	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
	       disposition == RTAS_DISP_FULLY_RECOVERED ?
	       "Recovered" : "Not recovered");
	if (user_mode(regs)) {
		printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
		       regs->nip, current->pid, current->comm);
	} else {
		printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
		       (void *)regs->nip);
	}
	printk("%s  Initiator: %s\n", level,
	       VAL_TO_STRING(initiators, initiator));

	switch (error_type) {
	case MC_ERROR_TYPE_UE:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_ue_types, err_sub_type));
		break;
	case MC_ERROR_TYPE_SLB:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_slb_types, err_sub_type));
		break;
	case MC_ERROR_TYPE_ERAT:
	case MC_ERROR_TYPE_TLB:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_soft_types, err_sub_type));
		break;
	default:
		printk("%s  Error type: %s\n", level,
		       VAL_TO_STRING(mc_err_types, error_type));
		break;
	}

	addr = rtas_mc_get_effective_addr(mce_log);
	if (addr)
		printk("%s    Effective address: %016llx\n", level, addr);
}