Esempio n. 1
0
/**
 * eeh_reset_device - Perform actual reset of a pci slot
 * @edev: PE associated EEH device
 * @bus: PCI bus corresponding to the isolcated slot
 *
 * This routine must be called to do reset on the indicated PE.
 * During the reset, udev might be invoked because those affected
 * PCI devices will be removed and then added.
 */
static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
{
	struct device_node *dn;
	int cnt, rc;

	/* pcibios will clear the counter; save the value */
	cnt = edev->freeze_count;

	if (bus)
		pcibios_remove_pci_devices(bus);

	/* Reset the pci controller. (Asserts RST#; resets config space).
	 * Reconfigure bridges and devices. Don't try to bring the system
	 * up if the reset failed for some reason.
	 */
	rc = eeh_reset_pe(edev);
	if (rc)
		return rc;

	/* Walk over all functions on this device. */
	dn = eeh_dev_to_of_node(edev);
	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
		dn = dn->parent->child;

	while (dn) {
		struct eeh_dev *pedev = of_node_to_eeh_dev(dn);

		/* On Power4, always true because eeh_pe_config_addr=0 */
		if (edev->pe_config_addr == pedev->pe_config_addr) {
			eeh_ops->configure_bridge(dn);
			eeh_restore_bars(pedev);
 		}
		dn = dn->sibling;
	}

	/* Give the system 5 seconds to finish running the user-space
	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
	 * this is a hack, but if we don't do this, and try to bring 
	 * the device up before the scripts have taken it down, 
	 * potentially weird things happen.
	 */
	if (bus) {
		ssleep(5);
		pcibios_add_pci_devices(bus);
	}
	edev->freeze_count = cnt;

	return 0;
}
Esempio n. 2
0
/* When a PCI device is isolated from the bus, a subsequent MMIO read is
 * required for the kernel EEH mechanisms to notice. As the Solarflare driver
 * was written to minimise MMIO read (for latency) then a periodic call to check
 * the EEH status of the device is required so that device recovery can happen
 * in a timely fashion.
 */
static void siena_monitor(struct efx_nic *efx)
{
	struct eeh_dev *eehdev =
		of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev));

	eeh_dev_check_failure(eehdev);
}
Esempio n. 3
0
int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
{
	int returnval = -1;
	unsigned long buid, addr;
	int ret;

	if (!pdn)
		return PCIBIOS_DEVICE_NOT_FOUND;
	if (!config_access_valid(pdn, where))
		return PCIBIOS_BAD_REGISTER_NUMBER;

	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
	buid = pdn->phb->buid;
	if (buid) {
		ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
				addr, BUID_HI(buid), BUID_LO(buid), size);
	} else {
		ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
	}
	*val = returnval;

	if (ret)
		return PCIBIOS_DEVICE_NOT_FOUND;

	if (returnval == EEH_IO_ERROR_VALUE(size) &&
	    eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
		return PCIBIOS_DEVICE_NOT_FOUND;

	return PCIBIOS_SUCCESSFUL;
}
/**
 * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
 * @dn: PE associated device node
 *
 * The function will be called to reconfigure the bridges included
 * in the specified PE so that the mulfunctional PE would be recovered
 * again.
 */
static int pseries_eeh_configure_bridge(struct device_node *dn)
{
	struct eeh_dev *edev;
	int config_addr;
	int ret;

	/* Figure out the PE address */
	edev = of_node_to_eeh_dev(dn);
	config_addr = edev->config_addr;
	if (edev->pe_config_addr)
		config_addr = edev->pe_config_addr;

	/* Use new configure-pe function, if supported */
	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
				config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid));
	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
				config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid));
	} else {
		return -EFAULT;
	}

	if (ret)
		pr_warning("%s: Unable to configure bridge %d for %s\n",
			__func__, ret, dn->full_name);

	return ret;
}
/**
 * pseries_eeh_get_log - Retrieve error log
 * @dn: device node
 * @severity: temporary or permanent error log
 * @drv_log: driver log to be combined with retrieved error log
 * @len: length of driver log
 *
 * Retrieve the temporary or permanent error from the PE.
 * Actually, the error will be retrieved through the dedicated
 * RTAS call.
 */
static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
{
	struct eeh_dev *edev;
	int config_addr;
	unsigned long flags;
	int ret;

	edev = of_node_to_eeh_dev(dn);
	spin_lock_irqsave(&slot_errbuf_lock, flags);
	memset(slot_errbuf, 0, eeh_error_buf_size);

	/* Figure out the PE address */
	config_addr = edev->config_addr;
	if (edev->pe_config_addr)
		config_addr = edev->pe_config_addr;

	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
			BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
			virt_to_phys(drv_log), len,
			virt_to_phys(slot_errbuf), eeh_error_buf_size,
			severity);
	if (!ret)
		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
	spin_unlock_irqrestore(&slot_errbuf_lock, flags);

	return ret;
}
/**
 * pseries_eeh_reset - Reset the specified PE
 * @dn: PE associated device node
 * @option: reset option
 *
 * Reset the specified PE
 */
static int pseries_eeh_reset(struct device_node *dn, int option)
{
	struct eeh_dev *edev;
	int config_addr;
	int ret;

	/* Figure out PE address */
	edev = of_node_to_eeh_dev(dn);
	config_addr = edev->config_addr;
	if (edev->pe_config_addr)
		config_addr = edev->pe_config_addr;

	/* Reset PE through RTAS call */
	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
			config_addr, BUID_HI(edev->phb->buid),
			BUID_LO(edev->phb->buid), option);

	/* If fundamental-reset not supported, try hot-reset */
	if (option == EEH_RESET_FUNDAMENTAL &&
	    ret == -8) {
		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
				config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid), EEH_RESET_HOT);
	}

	return ret;
}
Esempio n. 7
0
static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
{
	struct device_node *dn;
	int cnt, rc;

	
	cnt = edev->freeze_count;

	if (bus)
		pcibios_remove_pci_devices(bus);

	rc = eeh_reset_pe(edev);
	if (rc)
		return rc;

	
	dn = eeh_dev_to_of_node(edev);
	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
		dn = dn->parent->child;

	while (dn) {
		struct eeh_dev *pedev = of_node_to_eeh_dev(dn);

		
		if (edev->pe_config_addr == pedev->pe_config_addr) {
			eeh_ops->configure_bridge(dn);
			eeh_restore_bars(pedev);
 		}
		dn = dn->sibling;
	}

	if (bus) {
		ssleep(5);
		pcibios_add_pci_devices(bus);
	}
	edev->freeze_count = cnt;

	return 0;
}
/**
 * pseries_eeh_get_pe_addr - Retrieve PE address
 * @dn: device node
 *
 * Retrieve the assocated PE address. Actually, there're 2 RTAS
 * function calls dedicated for the purpose. We need implement
 * it through the new function and then the old one. Besides,
 * you should make sure the config address is figured out from
 * FDT node before calling the function.
 *
 * It's notable that zero'ed return value means invalid PE config
 * address.
 */
static int pseries_eeh_get_pe_addr(struct device_node *dn)
{
	struct eeh_dev *edev;
	int ret = 0;
	int rets[3];

	edev = of_node_to_eeh_dev(dn);

	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
		/*
		 * First of all, we need to make sure there has one PE
		 * associated with the device. Otherwise, PE address is
		 * meaningless.
		 */
		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
				edev->config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid), 1);
		if (ret || (rets[0] == 0))
			return 0;

		/* Retrieve the associated PE config address */
		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
				edev->config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid), 0);
		if (ret) {
			pr_warning("%s: Failed to get PE address for %s\n",
				__func__, dn->full_name);
			return 0;
		}

		return rets[0];
	}

	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
				edev->config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid), 0);
		if (ret) {
			pr_warning("%s: Failed to get PE address for %s\n",
				__func__, dn->full_name);
			return 0;
		}

		return rets[0];
	}

	return ret;
}
/**
 * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
 * @dn: device node
 * @option: operation to be issued
 *
 * The function is used to control the EEH functionality globally.
 * Currently, following options are support according to PAPR:
 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
 */
static int pseries_eeh_set_option(struct device_node *dn, int option)
{
	int ret = 0;
	struct eeh_dev *edev;
	const u32 *reg;
	int config_addr;

	edev = of_node_to_eeh_dev(dn);

	/*
	 * When we're enabling or disabling EEH functioality on
	 * the particular PE, the PE config address is possibly
	 * unavailable. Therefore, we have to figure it out from
	 * the FDT node.
	 */
	switch (option) {
	case EEH_OPT_DISABLE:
	case EEH_OPT_ENABLE:
		reg = of_get_property(dn, "reg", NULL);
		config_addr = reg[0];
		break;

	case EEH_OPT_THAW_MMIO:
	case EEH_OPT_THAW_DMA:
		config_addr = edev->config_addr;
		if (edev->pe_config_addr)
			config_addr = edev->pe_config_addr;
		break;

	default:
		pr_err("%s: Invalid option %d\n",
			__func__, option);
		return -EINVAL;
	}

	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
			config_addr, BUID_HI(edev->phb->buid),
			BUID_LO(edev->phb->buid), option);

	return ret;
}
/**
 * pseries_eeh_of_probe - EEH probe on the given device
 * @dn: OF node
 * @flag: Unused
 *
 * When EEH module is installed during system boot, all PCI devices
 * are checked one by one to see if it supports EEH. The function
 * is introduced for the purpose.
 */
static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
{
	struct eeh_dev *edev;
	struct eeh_pe pe;
	const u32 *class_code, *vendor_id, *device_id;
	const u32 *regs;
	int enable = 0;
	int ret;

	/* Retrieve OF node and eeh device */
	edev = of_node_to_eeh_dev(dn);
	if (!of_device_is_available(dn))
		return NULL;

	/* Retrieve class/vendor/device IDs */
	class_code = of_get_property(dn, "class-code", NULL);
	vendor_id  = of_get_property(dn, "vendor-id", NULL);
	device_id  = of_get_property(dn, "device-id", NULL);

	/* Skip for bad OF node or PCI-ISA bridge */
	if (!class_code || !vendor_id || !device_id)
		return NULL;
	if (dn->type && !strcmp(dn->type, "isa"))
		return NULL;

	/* Update class code and mode of eeh device */
	edev->class_code = *class_code;
	edev->mode = 0;

	/* Retrieve the device address */
	regs = of_get_property(dn, "reg", NULL);
	if (!regs) {
		pr_warning("%s: OF node property %s::reg not found\n",
			__func__, dn->full_name);
		return NULL;
	}

	/* Initialize the fake PE */
	memset(&pe, 0, sizeof(struct eeh_pe));
	pe.phb = edev->phb;
	pe.config_addr = regs[0];

	/* Enable EEH on the device */
	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
	if (!ret) {
		edev->config_addr = regs[0];
		/* Retrieve PE address */
		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
		pe.addr = edev->pe_config_addr;

		/* Some older systems (Power4) allow the ibm,set-eeh-option
		 * call to succeed even on nodes where EEH is not supported.
		 * Verify support explicitly.
		 */
		ret = eeh_ops->get_state(&pe, NULL);
		if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
			enable = 1;

		if (enable) {
			eeh_subsystem_enabled = 1;
			eeh_add_to_parent_pe(edev);

			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
				__func__, dn->full_name, pe.phb->global_number,
				pe.addr, pe.config_addr);
		} else if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
			   (of_node_to_eeh_dev(dn->parent))->pe) {
			/* This device doesn't support EEH, but it may have an
			 * EEH parent, in which case we mark it as supported.
			 */
			edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
			edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr;
			eeh_add_to_parent_pe(edev);
		}
	}

	/* Save memory bars */
	eeh_save_bars(edev);

	return NULL;
}
Esempio n. 11
0
struct eeh_dev *handle_eeh_events(struct eeh_event *event)
{
	struct device_node *frozen_dn;
	struct eeh_dev *frozen_edev;
	struct pci_bus *frozen_bus;
	int rc = 0;
	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;

	frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
	if (!frozen_dn) {
		location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
		location = location ? location : "unknown";
		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
		                "for location=%s pci addr=%s\n",
			location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
		return NULL;
	}

	frozen_bus = pcibios_find_pci_bus(frozen_dn);
	location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
	location = location ? location : "unknown";

	if (!frozen_bus)
		frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);

	if (!frozen_bus) {
		printk(KERN_ERR "EEH: Cannot find PCI bus "
		        "for location=%s dn=%s\n",
		        location, frozen_dn->full_name);
		return NULL;
	}

	frozen_edev = of_node_to_eeh_dev(frozen_dn);
	frozen_edev->freeze_count++;
	pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
	drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));

	if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
		goto excess_failures;

	printk(KERN_WARNING
	   "EEH: This PCI device has failed %d times in the last hour:\n",
		frozen_edev->freeze_count);

	if (frozen_edev->pdev) {
		bus_pci_str = pci_name(frozen_edev->pdev);
		bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
		printk(KERN_WARNING
			"EEH: Bus location=%s driver=%s pci addr=%s\n",
			location, bus_drv_str, bus_pci_str);
	}

	printk(KERN_WARNING
		"EEH: Device location=%s driver=%s pci addr=%s\n",
		location, drv_str, pci_str);

	pci_walk_bus(frozen_bus, eeh_report_error, &result);

	rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
		printk(KERN_WARNING "EEH: Permanent failure\n");
		goto hard_fail;
	}

	eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);

	if (result == PCI_ERS_RESULT_NONE) {
		rc = eeh_reset_device(frozen_edev, frozen_bus);
		if (rc) {
			printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
			goto hard_fail;
		}
	}

	
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);

		if (rc < 0)
			goto hard_fail;
		if (rc) {
			result = PCI_ERS_RESULT_NEED_RESET;
		} else {
			result = PCI_ERS_RESULT_NONE;
			pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
		}
	}

	
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);

		if (rc < 0)
			goto hard_fail;
		if (rc)
			result = PCI_ERS_RESULT_NEED_RESET;
		else
			result = PCI_ERS_RESULT_RECOVERED;
	}

	
	if (result == PCI_ERS_RESULT_DISCONNECT) {
		printk(KERN_WARNING "EEH: Device driver gave up\n");
		goto hard_fail;
	}

	
	if (result == PCI_ERS_RESULT_NEED_RESET) {
		rc = eeh_reset_device(frozen_edev, NULL);
		if (rc) {
			printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
			goto hard_fail;
		}
		result = PCI_ERS_RESULT_NONE;
		pci_walk_bus(frozen_bus, eeh_report_reset, &result);
	}

	
	if ((result != PCI_ERS_RESULT_RECOVERED) &&
	    (result != PCI_ERS_RESULT_NONE)) {
		printk(KERN_WARNING "EEH: Not recovered\n");
		goto hard_fail;
	}

	
	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);

	return frozen_edev;
	
excess_failures:
	printk(KERN_ERR
	   "EEH: PCI device at location=%s driver=%s pci addr=%s\n"
		"has failed %d times in the last hour "
		"and has been permanently disabled.\n"
		"Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str, frozen_edev->freeze_count);
	goto perm_error;

hard_fail:
	printk(KERN_ERR
	   "EEH: Unable to recover from failure of PCI device "
	   "at location=%s driver=%s pci addr=%s\n"
	   "Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str);

perm_error:
	eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);

	
	pci_walk_bus(frozen_bus, eeh_report_failure, NULL);

	
	pcibios_remove_pci_devices(frozen_bus);

	return NULL;
}
/**
 * pseries_eeh_get_state - Retrieve PE state
 * @dn: PE associated device node
 * @state: return value
 *
 * Retrieve the state of the specified PE. On RTAS compliant
 * pseries platform, there already has one dedicated RTAS function
 * for the purpose. It's notable that the associated PE config address
 * might be ready when calling the function. Therefore, endeavour to
 * use the PE config address if possible. Further more, there're 2
 * RTAS calls for the purpose, we need to try the new one and back
 * to the old one if the new one couldn't work properly.
 */
static int pseries_eeh_get_state(struct device_node *dn, int *state)
{
	struct eeh_dev *edev;
	int config_addr;
	int ret;
	int rets[4];
	int result;

	/* Figure out PE config address if possible */
	edev = of_node_to_eeh_dev(dn);
	config_addr = edev->config_addr;
	if (edev->pe_config_addr)
		config_addr = edev->pe_config_addr;

	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
		ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
				config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid));
	} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
		/* Fake PE unavailable info */
		rets[2] = 0;
		ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
				config_addr, BUID_HI(edev->phb->buid),
				BUID_LO(edev->phb->buid));
	} else {
		return EEH_STATE_NOT_SUPPORT;
	}

	if (ret)
		return ret;

	/* Parse the result out */
	result = 0;
	if (rets[1]) {
		switch(rets[0]) {
		case 0:
			result &= ~EEH_STATE_RESET_ACTIVE;
			result |= EEH_STATE_MMIO_ACTIVE;
			result |= EEH_STATE_DMA_ACTIVE;
			break;
		case 1:
			result |= EEH_STATE_RESET_ACTIVE;
			result |= EEH_STATE_MMIO_ACTIVE;
			result |= EEH_STATE_DMA_ACTIVE;
			break;
		case 2:
			result &= ~EEH_STATE_RESET_ACTIVE;
			result &= ~EEH_STATE_MMIO_ACTIVE;
			result &= ~EEH_STATE_DMA_ACTIVE;
			break;
		case 4:
			result &= ~EEH_STATE_RESET_ACTIVE;
			result &= ~EEH_STATE_MMIO_ACTIVE;
			result &= ~EEH_STATE_DMA_ACTIVE;
			result |= EEH_STATE_MMIO_ENABLED;
			break;
		case 5:
			if (rets[2]) {
				if (state) *state = rets[2];
				result = EEH_STATE_UNAVAILABLE;
			} else {
				result = EEH_STATE_NOT_SUPPORT;
			}
		default:
			result = EEH_STATE_NOT_SUPPORT;
		}
	} else {
		result = EEH_STATE_NOT_SUPPORT;
	}

	return result;
}
Esempio n. 13
0
/**
 * eeh_handle_event - Reset a PCI device after hard lockup.
 * @event: EEH event
 *
 * While PHB detects address or data parity errors on particular PCI
 * slot, the associated PE will be frozen. Besides, DMA's occurring
 * to wild addresses (which usually happen due to bugs in device
 * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
 * #PERR or other misc PCI-related errors also can trigger EEH errors.
 *
 * Recovery process consists of unplugging the device driver (which
 * generated hotplug events to userspace), then issuing a PCI #RST to
 * the device, then reconfiguring the PCI config space for all bridges
 * & devices under this slot, and then finally restarting the device
 * drivers (which cause a second set of hotplug events to go out to
 * userspace).
 */
struct eeh_dev *handle_eeh_events(struct eeh_event *event)
{
	struct device_node *frozen_dn;
	struct eeh_dev *frozen_edev;
	struct pci_bus *frozen_bus;
	int rc = 0;
	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;

	frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
	if (!frozen_dn) {
		location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
		location = location ? location : "unknown";
		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
		                "for location=%s pci addr=%s\n",
			location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
		return NULL;
	}

	frozen_bus = pcibios_find_pci_bus(frozen_dn);
	location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
	location = location ? location : "unknown";

	/* There are two different styles for coming up with the PE.
	 * In the old style, it was the highest EEH-capable device
	 * which was always an EADS pci bridge.  In the new style,
	 * there might not be any EADS bridges, and even when there are,
	 * the firmware marks them as "EEH incapable". So another
	 * two-step is needed to find the pci bus..
	 */
	if (!frozen_bus)
		frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);

	if (!frozen_bus) {
		printk(KERN_ERR "EEH: Cannot find PCI bus "
		        "for location=%s dn=%s\n",
		        location, frozen_dn->full_name);
		return NULL;
	}

	frozen_edev = of_node_to_eeh_dev(frozen_dn);
	frozen_edev->freeze_count++;
	pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
	drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));

	if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
		goto excess_failures;

	printk(KERN_WARNING
	   "EEH: This PCI device has failed %d times in the last hour:\n",
		frozen_edev->freeze_count);

	if (frozen_edev->pdev) {
		bus_pci_str = pci_name(frozen_edev->pdev);
		bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
		printk(KERN_WARNING
			"EEH: Bus location=%s driver=%s pci addr=%s\n",
			location, bus_drv_str, bus_pci_str);
	}

	printk(KERN_WARNING
		"EEH: Device location=%s driver=%s pci addr=%s\n",
		location, drv_str, pci_str);

	/* Walk the various device drivers attached to this slot through
	 * a reset sequence, giving each an opportunity to do what it needs
	 * to accomplish the reset.  Each child gets a report of the
	 * status ... if any child can't handle the reset, then the entire
	 * slot is dlpar removed and added.
	 */
	pci_walk_bus(frozen_bus, eeh_report_error, &result);

	/* Get the current PCI slot state. This can take a long time,
	 * sometimes over 3 seconds for certain systems.
	 */
	rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
		printk(KERN_WARNING "EEH: Permanent failure\n");
		goto hard_fail;
	}

	/* Since rtas may enable MMIO when posting the error log,
	 * don't post the error log until after all dev drivers
	 * have been informed.
	 */
	eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);

	/* If all device drivers were EEH-unaware, then shut
	 * down all of the device drivers, and hope they
	 * go down willingly, without panicing the system.
	 */
	if (result == PCI_ERS_RESULT_NONE) {
		rc = eeh_reset_device(frozen_edev, frozen_bus);
		if (rc) {
			printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
			goto hard_fail;
		}
	}

	/* If all devices reported they can proceed, then re-enable MMIO */
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);

		if (rc < 0)
			goto hard_fail;
		if (rc) {
			result = PCI_ERS_RESULT_NEED_RESET;
		} else {
			result = PCI_ERS_RESULT_NONE;
			pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
		}
	}

	/* If all devices reported they can proceed, then re-enable DMA */
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);

		if (rc < 0)
			goto hard_fail;
		if (rc)
			result = PCI_ERS_RESULT_NEED_RESET;
		else
			result = PCI_ERS_RESULT_RECOVERED;
	}

	/* If any device has a hard failure, then shut off everything. */
	if (result == PCI_ERS_RESULT_DISCONNECT) {
		printk(KERN_WARNING "EEH: Device driver gave up\n");
		goto hard_fail;
	}

	/* If any device called out for a reset, then reset the slot */
	if (result == PCI_ERS_RESULT_NEED_RESET) {
		rc = eeh_reset_device(frozen_edev, NULL);
		if (rc) {
			printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
			goto hard_fail;
		}
		result = PCI_ERS_RESULT_NONE;
		pci_walk_bus(frozen_bus, eeh_report_reset, &result);
	}

	/* All devices should claim they have recovered by now. */
	if ((result != PCI_ERS_RESULT_RECOVERED) &&
	    (result != PCI_ERS_RESULT_NONE)) {
		printk(KERN_WARNING "EEH: Not recovered\n");
		goto hard_fail;
	}

	/* Tell all device drivers that they can resume operations */
	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);

	return frozen_edev;
	
excess_failures:
	/*
	 * About 90% of all real-life EEH failures in the field
	 * are due to poorly seated PCI cards. Only 10% or so are
	 * due to actual, failed cards.
	 */
	printk(KERN_ERR
	   "EEH: PCI device at location=%s driver=%s pci addr=%s\n"
		"has failed %d times in the last hour "
		"and has been permanently disabled.\n"
		"Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str, frozen_edev->freeze_count);
	goto perm_error;

hard_fail:
	printk(KERN_ERR
	   "EEH: Unable to recover from failure of PCI device "
	   "at location=%s driver=%s pci addr=%s\n"
	   "Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str);

perm_error:
	eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);

	/* Notify all devices that they're about to go down. */
	pci_walk_bus(frozen_bus, eeh_report_failure, NULL);

	/* Shut down the device drivers for good. */
	pcibios_remove_pci_devices(frozen_bus);

	return NULL;
}