/** * eeh_reset_device - Perform actual reset of a pci slot * @edev: PE associated EEH device * @bus: PCI bus corresponding to the isolcated slot * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) { struct device_node *dn; int cnt, rc; /* pcibios will clear the counter; save the value */ cnt = edev->freeze_count; if (bus) pcibios_remove_pci_devices(bus); /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. */ rc = eeh_reset_pe(edev); if (rc) return rc; /* Walk over all functions on this device. */ dn = eeh_dev_to_of_node(edev); if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) dn = dn->parent->child; while (dn) { struct eeh_dev *pedev = of_node_to_eeh_dev(dn); /* On Power4, always true because eeh_pe_config_addr=0 */ if (edev->pe_config_addr == pedev->pe_config_addr) { eeh_ops->configure_bridge(dn); eeh_restore_bars(pedev); } dn = dn->sibling; } /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, * this is a hack, but if we don't do this, and try to bring * the device up before the scripts have taken it down, * potentially weird things happen. */ if (bus) { ssleep(5); pcibios_add_pci_devices(bus); } edev->freeze_count = cnt; return 0; }
/* When a PCI device is isolated from the bus, a subsequent MMIO read is * required for the kernel EEH mechanisms to notice. As the Solarflare driver * was written to minimise MMIO read (for latency) then a periodic call to check * the EEH status of the device is required so that device recovery can happen * in a timely fashion. */ static void siena_monitor(struct efx_nic *efx) { struct eeh_dev *eehdev = of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev)); eeh_dev_check_failure(eehdev); }
int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; int ret; if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; if (!config_access_valid(pdn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; addr = rtas_config_addr(pdn->busno, pdn->devfn, where); buid = pdn->phb->buid; if (buid) { ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, addr, BUID_HI(buid), BUID_LO(buid), size); } else { ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); } *val = returnval; if (ret) return PCIBIOS_DEVICE_NOT_FOUND; if (returnval == EEH_IO_ERROR_VALUE(size) && eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node))) return PCIBIOS_DEVICE_NOT_FOUND; return PCIBIOS_SUCCESSFUL; }
/** * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE * @dn: PE associated device node * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered * again. */ static int pseries_eeh_configure_bridge(struct device_node *dn) { struct eeh_dev *edev; int config_addr; int ret; /* Figure out the PE address */ edev = of_node_to_eeh_dev(dn); config_addr = edev->config_addr; if (edev->pe_config_addr) config_addr = edev->pe_config_addr; /* Use new configure-pe function, if supported */ if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_configure_pe, 3, 1, NULL, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid)); } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid)); } else { return -EFAULT; } if (ret) pr_warning("%s: Unable to configure bridge %d for %s\n", __func__, ret, dn->full_name); return ret; }
/** * pseries_eeh_get_log - Retrieve error log * @dn: device node * @severity: temporary or permanent error log * @drv_log: driver log to be combined with retrieved error log * @len: length of driver log * * Retrieve the temporary or permanent error from the PE. * Actually, the error will be retrieved through the dedicated * RTAS call. */ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len) { struct eeh_dev *edev; int config_addr; unsigned long flags; int ret; edev = of_node_to_eeh_dev(dn); spin_lock_irqsave(&slot_errbuf_lock, flags); memset(slot_errbuf, 0, eeh_error_buf_size); /* Figure out the PE address */ config_addr = edev->config_addr; if (edev->pe_config_addr) config_addr = edev->pe_config_addr; ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), virt_to_phys(drv_log), len, virt_to_phys(slot_errbuf), eeh_error_buf_size, severity); if (!ret) log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); spin_unlock_irqrestore(&slot_errbuf_lock, flags); return ret; }
/** * pseries_eeh_reset - Reset the specified PE * @dn: PE associated device node * @option: reset option * * Reset the specified PE */ static int pseries_eeh_reset(struct device_node *dn, int option) { struct eeh_dev *edev; int config_addr; int ret; /* Figure out PE address */ edev = of_node_to_eeh_dev(dn); config_addr = edev->config_addr; if (edev->pe_config_addr) config_addr = edev->pe_config_addr; /* Reset PE through RTAS call */ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), option); /* If fundamental-reset not supported, try hot-reset */ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), EEH_RESET_HOT); } return ret; }
static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) { struct device_node *dn; int cnt, rc; cnt = edev->freeze_count; if (bus) pcibios_remove_pci_devices(bus); rc = eeh_reset_pe(edev); if (rc) return rc; dn = eeh_dev_to_of_node(edev); if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) dn = dn->parent->child; while (dn) { struct eeh_dev *pedev = of_node_to_eeh_dev(dn); if (edev->pe_config_addr == pedev->pe_config_addr) { eeh_ops->configure_bridge(dn); eeh_restore_bars(pedev); } dn = dn->sibling; } if (bus) { ssleep(5); pcibios_add_pci_devices(bus); } edev->freeze_count = cnt; return 0; }
/** * pseries_eeh_get_pe_addr - Retrieve PE address * @dn: device node * * Retrieve the assocated PE address. Actually, there're 2 RTAS * function calls dedicated for the purpose. We need implement * it through the new function and then the old one. Besides, * you should make sure the config address is figured out from * FDT node before calling the function. * * It's notable that zero'ed return value means invalid PE config * address. */ static int pseries_eeh_get_pe_addr(struct device_node *dn) { struct eeh_dev *edev; int ret = 0; int rets[3]; edev = of_node_to_eeh_dev(dn); if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* * First of all, we need to make sure there has one PE * associated with the device. Otherwise, PE address is * meaningless. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, edev->config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), 1); if (ret || (rets[0] == 0)) return 0; /* Retrieve the associated PE config address */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, edev->config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), 0); if (ret) { pr_warning("%s: Failed to get PE address for %s\n", __func__, dn->full_name); return 0; } return rets[0]; } if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, edev->config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), 0); if (ret) { pr_warning("%s: Failed to get PE address for %s\n", __func__, dn->full_name); return 0; } return rets[0]; } return ret; }
/** * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable * @dn: device node * @option: operation to be issued * * The function is used to control the EEH functionality globally. * Currently, following options are support according to PAPR: * Enable EEH, Disable EEH, Enable MMIO and Enable DMA */ static int pseries_eeh_set_option(struct device_node *dn, int option) { int ret = 0; struct eeh_dev *edev; const u32 *reg; int config_addr; edev = of_node_to_eeh_dev(dn); /* * When we're enabling or disabling EEH functioality on * the particular PE, the PE config address is possibly * unavailable. Therefore, we have to figure it out from * the FDT node. */ switch (option) { case EEH_OPT_DISABLE: case EEH_OPT_ENABLE: reg = of_get_property(dn, "reg", NULL); config_addr = reg[0]; break; case EEH_OPT_THAW_MMIO: case EEH_OPT_THAW_DMA: config_addr = edev->config_addr; if (edev->pe_config_addr) config_addr = edev->pe_config_addr; break; default: pr_err("%s: Invalid option %d\n", __func__, option); return -EINVAL; } ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), option); return ret; }
/** * pseries_eeh_of_probe - EEH probe on the given device * @dn: OF node * @flag: Unused * * When EEH module is installed during system boot, all PCI devices * are checked one by one to see if it supports EEH. The function * is introduced for the purpose. */ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) { struct eeh_dev *edev; struct eeh_pe pe; const u32 *class_code, *vendor_id, *device_id; const u32 *regs; int enable = 0; int ret; /* Retrieve OF node and eeh device */ edev = of_node_to_eeh_dev(dn); if (!of_device_is_available(dn)) return NULL; /* Retrieve class/vendor/device IDs */ class_code = of_get_property(dn, "class-code", NULL); vendor_id = of_get_property(dn, "vendor-id", NULL); device_id = of_get_property(dn, "device-id", NULL); /* Skip for bad OF node or PCI-ISA bridge */ if (!class_code || !vendor_id || !device_id) return NULL; if (dn->type && !strcmp(dn->type, "isa")) return NULL; /* Update class code and mode of eeh device */ edev->class_code = *class_code; edev->mode = 0; /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); if (!regs) { pr_warning("%s: OF node property %s::reg not found\n", __func__, dn->full_name); return NULL; } /* Initialize the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); pe.phb = edev->phb; pe.config_addr = regs[0]; /* Enable EEH on the device */ ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); if (!ret) { edev->config_addr = regs[0]; /* Retrieve PE address */ edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); pe.addr = edev->pe_config_addr; /* Some older systems (Power4) allow the ibm,set-eeh-option * call to succeed even on nodes where EEH is not supported. * Verify support explicitly. */ ret = eeh_ops->get_state(&pe, NULL); if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; if (enable) { eeh_subsystem_enabled = 1; eeh_add_to_parent_pe(edev); pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", __func__, dn->full_name, pe.phb->global_number, pe.addr, pe.config_addr); } else if (dn->parent && of_node_to_eeh_dev(dn->parent) && (of_node_to_eeh_dev(dn->parent))->pe) { /* This device doesn't support EEH, but it may have an * EEH parent, in which case we mark it as supported. */ edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; eeh_add_to_parent_pe(edev); } } /* Save memory bars */ eeh_save_bars(edev); return NULL; }
struct eeh_dev *handle_eeh_events(struct eeh_event *event) { struct device_node *frozen_dn; struct eeh_dev *frozen_edev; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); if (!frozen_dn) { location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); return NULL; } frozen_bus = pcibios_find_pci_bus(frozen_dn); location = of_get_property(frozen_dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; if (!frozen_bus) frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); if (!frozen_bus) { printk(KERN_ERR "EEH: Cannot find PCI bus " "for location=%s dn=%s\n", location, frozen_dn->full_name); return NULL; } frozen_edev = of_node_to_eeh_dev(frozen_dn); frozen_edev->freeze_count++; pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_edev->freeze_count); if (frozen_edev->pdev) { bus_pci_str = pci_name(frozen_edev->pdev); bus_drv_str = eeh_pcid_name(frozen_edev->pdev); printk(KERN_WARNING "EEH: Bus location=%s driver=%s pci addr=%s\n", location, bus_drv_str, bus_pci_str); } printk(KERN_WARNING "EEH: Device location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); pci_walk_bus(frozen_bus, eeh_report_error, &result); rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; } eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); if (result == PCI_ERS_RESULT_NONE) { rc = eeh_reset_device(frozen_edev, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; } } if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); } } if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; if (rc) result = PCI_ERS_RESULT_NEED_RESET; else result = PCI_ERS_RESULT_RECOVERED; } if (result == PCI_ERS_RESULT_DISCONNECT) { printk(KERN_WARNING "EEH: Device driver gave up\n"); goto hard_fail; } if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_edev, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; } result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_reset, &result); } if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { printk(KERN_WARNING "EEH: Not recovered\n"); goto hard_fail; } pci_walk_bus(frozen_bus, eeh_report_resume, NULL); return frozen_edev; excess_failures: printk(KERN_ERR "EEH: PCI device at location=%s driver=%s pci addr=%s\n" "has failed %d times in the last hour " "and has been permanently disabled.\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str, frozen_edev->freeze_count); goto perm_error; hard_fail: printk(KERN_ERR "EEH: Unable to recover from failure of PCI device " "at location=%s driver=%s pci addr=%s\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); pci_walk_bus(frozen_bus, eeh_report_failure, NULL); pcibios_remove_pci_devices(frozen_bus); return NULL; }
/** * pseries_eeh_get_state - Retrieve PE state * @dn: PE associated device node * @state: return value * * Retrieve the state of the specified PE. On RTAS compliant * pseries platform, there already has one dedicated RTAS function * for the purpose. It's notable that the associated PE config address * might be ready when calling the function. Therefore, endeavour to * use the PE config address if possible. Further more, there're 2 * RTAS calls for the purpose, we need to try the new one and back * to the old one if the new one couldn't work properly. */ static int pseries_eeh_get_state(struct device_node *dn, int *state) { struct eeh_dev *edev; int config_addr; int ret; int rets[4]; int result; /* Figure out PE config address if possible */ edev = of_node_to_eeh_dev(dn); config_addr = edev->config_addr; if (edev->pe_config_addr) config_addr = edev->pe_config_addr; if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid)); } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { /* Fake PE unavailable info */ rets[2] = 0; ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, config_addr, BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid)); } else { return EEH_STATE_NOT_SUPPORT; } if (ret) return ret; /* Parse the result out */ result = 0; if (rets[1]) { switch(rets[0]) { case 0: result &= ~EEH_STATE_RESET_ACTIVE; result |= EEH_STATE_MMIO_ACTIVE; result |= EEH_STATE_DMA_ACTIVE; break; case 1: result |= EEH_STATE_RESET_ACTIVE; result |= EEH_STATE_MMIO_ACTIVE; result |= EEH_STATE_DMA_ACTIVE; break; case 2: result &= ~EEH_STATE_RESET_ACTIVE; result &= ~EEH_STATE_MMIO_ACTIVE; result &= ~EEH_STATE_DMA_ACTIVE; break; case 4: result &= ~EEH_STATE_RESET_ACTIVE; result &= ~EEH_STATE_MMIO_ACTIVE; result &= ~EEH_STATE_DMA_ACTIVE; result |= EEH_STATE_MMIO_ENABLED; break; case 5: if (rets[2]) { if (state) *state = rets[2]; result = EEH_STATE_UNAVAILABLE; } else { result = EEH_STATE_NOT_SUPPORT; } default: result = EEH_STATE_NOT_SUPPORT; } } else { result = EEH_STATE_NOT_SUPPORT; } return result; }
/** * eeh_handle_event - Reset a PCI device after hard lockup. * @event: EEH event * * While PHB detects address or data parity errors on particular PCI * slot, the associated PE will be frozen. Besides, DMA's occurring * to wild addresses (which usually happen due to bugs in device * drivers or in PCI adapter firmware) can cause EEH error. #SERR, * #PERR or other misc PCI-related errors also can trigger EEH errors. * * Recovery process consists of unplugging the device driver (which * generated hotplug events to userspace), then issuing a PCI #RST to * the device, then reconfiguring the PCI config space for all bridges * & devices under this slot, and then finally restarting the device * drivers (which cause a second set of hotplug events to go out to * userspace). */ struct eeh_dev *handle_eeh_events(struct eeh_event *event) { struct device_node *frozen_dn; struct eeh_dev *frozen_edev; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); if (!frozen_dn) { location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); return NULL; } frozen_bus = pcibios_find_pci_bus(frozen_dn); location = of_get_property(frozen_dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. * In the old style, it was the highest EEH-capable device * which was always an EADS pci bridge. In the new style, * there might not be any EADS bridges, and even when there are, * the firmware marks them as "EEH incapable". So another * two-step is needed to find the pci bus.. */ if (!frozen_bus) frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); if (!frozen_bus) { printk(KERN_ERR "EEH: Cannot find PCI bus " "for location=%s dn=%s\n", location, frozen_dn->full_name); return NULL; } frozen_edev = of_node_to_eeh_dev(frozen_dn); frozen_edev->freeze_count++; pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_edev->freeze_count); if (frozen_edev->pdev) { bus_pci_str = pci_name(frozen_edev->pdev); bus_drv_str = eeh_pcid_name(frozen_edev->pdev); printk(KERN_WARNING "EEH: Bus location=%s driver=%s pci addr=%s\n", location, bus_drv_str, bus_pci_str); } printk(KERN_WARNING "EEH: Device location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs * to accomplish the reset. Each child gets a report of the * status ... if any child can't handle the reset, then the entire * slot is dlpar removed and added. */ pci_walk_bus(frozen_bus, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; } /* Since rtas may enable MMIO when posting the error log, * don't post the error log until after all dev drivers * have been informed. */ eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { rc = eeh_reset_device(frozen_edev, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; } } /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); } } /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; if (rc) result = PCI_ERS_RESULT_NEED_RESET; else result = PCI_ERS_RESULT_RECOVERED; } /* If any device has a hard failure, then shut off everything. */ if (result == PCI_ERS_RESULT_DISCONNECT) { printk(KERN_WARNING "EEH: Device driver gave up\n"); goto hard_fail; } /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_edev, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; } result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_reset, &result); } /* All devices should claim they have recovered by now. */ if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { printk(KERN_WARNING "EEH: Not recovered\n"); goto hard_fail; } /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); return frozen_edev; excess_failures: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ printk(KERN_ERR "EEH: PCI device at location=%s driver=%s pci addr=%s\n" "has failed %d times in the last hour " "and has been permanently disabled.\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str, frozen_edev->freeze_count); goto perm_error; hard_fail: printk(KERN_ERR "EEH: Unable to recover from failure of PCI device " "at location=%s driver=%s pci addr=%s\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ pci_walk_bus(frozen_bus, eeh_report_failure, NULL); /* Shut down the device drivers for good. */ pcibios_remove_pci_devices(frozen_bus); return NULL; }