Beispiel #1
0
static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
{
	struct pseries_errorlog *pseries_log;
	struct pseries_hp_errorlog *hp_elog;

	spin_lock(&ras_log_buf_lock);

	rtas_call(ras_check_exception_token, 6, 1, NULL,
		  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
		  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
		  rtas_get_error_log_max());

	pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
					   PSERIES_ELOG_SECT_ID_HOTPLUG);
	hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;

	/*
	 * Since PCI hotplug is not currently supported on pseries, put PCI
	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
	 */
	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
		queue_hotplug_event(hp_elog);
	else
		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);

	spin_unlock(&ras_log_buf_lock);
	return IRQ_HANDLED;
}
Beispiel #2
0
/**
 * Find the data portion of an IO Event section from event log.
 * @elog: RTAS error/event log.
 *
 * Return:
 * 	pointer to a valid IO event section data. NULL if not found.
 */
static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
{
	struct pseries_errorlog *sect;

	/* We should only ever get called for io-event interrupts, but if
	 * we do get called for another type then something went wrong so
	 * make some noise about it.
	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
	 * No need to check event log version.
	 */
	if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) {
		printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d",
			    rtas_error_type(elog));
		return NULL;
	}

	sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
	if (unlikely(!sect)) {
		printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
			    "log does not contain an IO Event section. "
			    "Could be a bug in system firmware!\n");
		return NULL;
	}
	return (struct pseries_io_event *) &sect->data;
}
Beispiel #3
0
static void rtas_parse_epow_errlog(struct rtas_error_log *log)
{
	struct pseries_errorlog *pseries_log;
	struct epow_errorlog *epow_log;
	char action_code;
	char modifier;

	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
	if (pseries_log == NULL)
		return;

	epow_log = (struct epow_errorlog *)pseries_log->data;
	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */

	switch (action_code) {
	case EPOW_RESET:
		if (num_epow_events) {
			pr_info("Non critical power/cooling issue cleared\n");
			num_epow_events--;
		}
		break;

	case EPOW_WARN_COOLING:
		pr_info("Non-critical cooling issue detected. Check RTAS error"
			" log for details\n");
		break;

	case EPOW_WARN_POWER:
		pr_info("Non-critical power issue detected. Check RTAS error"
			" log for details\n");
		break;

	case EPOW_SYSTEM_SHUTDOWN:
		handle_system_shutdown(epow_log->event_modifier);
		break;

	case EPOW_SYSTEM_HALT:
		pr_emerg("Critical power/cooling issue detected. Check RTAS"
			 " error log for details. Powering off.\n");
		orderly_poweroff(true);
		break;

	case EPOW_MAIN_ENCLOSURE:
	case EPOW_POWER_OFF:
		pr_emerg("System about to lose power. Check RTAS error log "
			 " for details. Powering off immediately.\n");
		emergency_sync();
		kernel_power_off();
		break;

	default:
		pr_err("Unknown power/cooling event (action code  = %d)\n",
			action_code);
	}

	/* Increment epow events counter variable */
	if (action_code != EPOW_RESET)
		num_epow_events++;
}
Beispiel #4
0
void rtas_parse_epow_errlog(struct rtas_error_log *log)
{
    struct pseries_errorlog *pseries_log;
    struct epow_errorlog *epow_log;
    char action_code;
    char modifier;

    pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
    if (pseries_log == NULL)
        return;

    epow_log = (struct epow_errorlog *)pseries_log->data;
    action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
    modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */

    switch (action_code) {
    case EPOW_RESET:
        pr_err("Non critical power or cooling issue cleared");
        break;

    case EPOW_WARN_COOLING:
        pr_err("Non critical cooling issue reported by firmware");
        pr_err("Check RTAS error log for details");
        break;

    case EPOW_WARN_POWER:
        pr_err("Non critical power issue reported by firmware");
        pr_err("Check RTAS error log for details");
        break;

    case EPOW_SYSTEM_SHUTDOWN:
        handle_system_shutdown(epow_log->event_modifier);
        break;

    case EPOW_SYSTEM_HALT:
        pr_emerg("Firmware initiated power off");
        orderly_poweroff(1);
        break;

    case EPOW_MAIN_ENCLOSURE:
    case EPOW_POWER_OFF:
        pr_emerg("Critical power/cooling issue reported by firmware");
        pr_emerg("Check RTAS error log for details");
        pr_emerg("Immediate power off");
        emergency_sync();
        kernel_power_off();
        break;

    default:
        pr_err("Unknown power/cooling event (action code %d)",
               action_code);
    }
}
Beispiel #5
0
static int mce_handle_error(struct rtas_error_log *errp)
{
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;
	int disposition = rtas_error_disposition(errp);
	u8 error_type;

	if (!rtas_error_extended(errp))
		goto out;

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (pseries_log == NULL)
		goto out;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
	error_type = mce_log->error_type;

#ifdef CONFIG_PPC_BOOK3S_64
	if (disposition == RTAS_DISP_NOT_RECOVERED) {
		switch (error_type) {
		case	MC_ERROR_TYPE_SLB:
		case	MC_ERROR_TYPE_ERAT:
			/*
			 * Store the old slb content in paca before flushing.
			 * Print this when we go to virtual mode.
			 * There are chances that we may hit MCE again if there
			 * is a parity error on the SLB entry we trying to read
			 * for saving. Hence limit the slb saving to single
			 * level of recursion.
			 */
			if (local_paca->in_mce == 1)
				slb_save_contents(local_paca->mce_faulty_slbs);
			flush_and_reload_slb();
			disposition = RTAS_DISP_FULLY_RECOVERED;
			rtas_set_disposition_recovered(errp);
			break;
		default:
			break;
		}
	}
#endif

out:
	return disposition;
}
Beispiel #6
0
static void pseries_process_ue(struct pt_regs *regs,
			       struct rtas_error_log *errp)
{
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;

	if (!rtas_error_extended(errp))
		return;

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (!pseries_log)
		return;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;

	if (mce_log->error_type == MC_ERROR_TYPE_UE)
		pseries_do_memory_failure(regs, mce_log);
}
Beispiel #7
0
static void pseries_print_mce_info(struct pt_regs *regs,
				   struct rtas_error_log *errp)
{
	const char *level, *sevstr;
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;
	u8 error_type, err_sub_type;
	u64 addr;
	u8 initiator = rtas_error_initiator(errp);
	int disposition = rtas_error_disposition(errp);

	static const char * const initiators[] = {
		"Unknown",
		"CPU",
		"PCI",
		"ISA",
		"Memory",
		"Power Mgmt",
	};
	static const char * const mc_err_types[] = {
		"UE",
		"SLB",
		"ERAT",
		"TLB",
		"D-Cache",
		"Unknown",
		"I-Cache",
	};
	static const char * const mc_ue_types[] = {
		"Indeterminate",
		"Instruction fetch",
		"Page table walk ifetch",
		"Load/Store",
		"Page table walk Load/Store",
	};

	/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
	static const char * const mc_slb_types[] = {
		"Parity",
		"Multihit",
		"Indeterminate",
	};

	/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
	static const char * const mc_soft_types[] = {
		"Unknown",
		"Parity",
		"Multihit",
		"Indeterminate",
	};

	if (!rtas_error_extended(errp)) {
		pr_err("Machine check interrupt: Missing extended error log\n");
		return;
	}

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (pseries_log == NULL)
		return;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;

	error_type = mce_log->error_type;
	err_sub_type = rtas_mc_error_sub_type(mce_log);

	switch (rtas_error_severity(errp)) {
	case RTAS_SEVERITY_NO_ERROR:
		level = KERN_INFO;
		sevstr = "Harmless";
		break;
	case RTAS_SEVERITY_WARNING:
		level = KERN_WARNING;
		sevstr = "";
		break;
	case RTAS_SEVERITY_ERROR:
	case RTAS_SEVERITY_ERROR_SYNC:
		level = KERN_ERR;
		sevstr = "Severe";
		break;
	case RTAS_SEVERITY_FATAL:
	default:
		level = KERN_ERR;
		sevstr = "Fatal";
		break;
	}

#ifdef CONFIG_PPC_BOOK3S_64
	/* Display faulty slb contents for SLB errors. */
	if (error_type == MC_ERROR_TYPE_SLB)
		slb_dump_contents(local_paca->mce_faulty_slbs);
#endif

	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
	       disposition == RTAS_DISP_FULLY_RECOVERED ?
	       "Recovered" : "Not recovered");
	if (user_mode(regs)) {
		printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
		       regs->nip, current->pid, current->comm);
	} else {
		printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
		       (void *)regs->nip);
	}
	printk("%s  Initiator: %s\n", level,
	       VAL_TO_STRING(initiators, initiator));

	switch (error_type) {
	case MC_ERROR_TYPE_UE:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_ue_types, err_sub_type));
		break;
	case MC_ERROR_TYPE_SLB:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_slb_types, err_sub_type));
		break;
	case MC_ERROR_TYPE_ERAT:
	case MC_ERROR_TYPE_TLB:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_soft_types, err_sub_type));
		break;
	default:
		printk("%s  Error type: %s\n", level,
		       VAL_TO_STRING(mc_err_types, error_type));
		break;
	}

	addr = rtas_mc_get_effective_addr(mce_log);
	if (addr)
		printk("%s    Effective address: %016llx\n", level, addr);
}