static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, enum pci_channel_state state, char *error_mesg, int (*cb)(struct pci_dev *, void *)) { struct aer_broadcast_data result_data; dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg); result_data.state = state; if (cb == report_error_detected) result_data.result = PCI_ERS_RESULT_CAN_RECOVER; else result_data.result = PCI_ERS_RESULT_RECOVERED; if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { if (cb == report_error_detected) dev->error_state = state; pci_walk_bus(dev->subordinate, cb, &result_data); if (cb == report_resume) { pci_cleanup_aer_uncorrect_error_status(dev); dev->error_state = pci_channel_io_normal; } } else { pci_walk_bus(dev->bus, cb, &result_data); } return result_data.result; }
static void via_no_dac(struct pci_dev *dev) { if (!disable_dac_quirk) { dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); pci_walk_bus(dev->subordinate, via_no_dac_cb, NULL); } }
static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *info) { struct pci_dev *pdev, *alias_dev; struct msi_domain_info *msi_info; int alias_count = 0; if (!dev_is_pci(dev)) return -EINVAL; msi_info = msi_get_domain_info(domain->parent); pdev = to_pci_dev(dev); /* * If pdev is downstream of any aliasing bridges, take an upper * bound of how many other vectors could map to the same DevID. */ pci_for_each_dma_alias(pdev, its_get_pci_alias, &alias_dev); if (alias_dev != pdev && alias_dev->subordinate) pci_walk_bus(alias_dev->subordinate, its_pci_msi_vec_count, &alias_count); /* ITS specific DeviceID, as the core ITS ignores dev. */ info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev); return msi_info->ops->msi_prepare(domain->parent, dev, max(nvec, alias_count), info); }
/** * find_source_device - search through device hierarchy for source device * @parent: pointer to Root Port pci_dev data structure * @e_info: including detailed error information such like id * * Return true if found. * * Invoked by DPC when error is detected at the Root Port. * Caller of this function must set id, severity, and multi_error_valid of * struct aer_err_info pointed by @e_info properly. This function must fill * e_info->error_dev_num and e_info->dev[], based on the given information. */ static bool find_source_device(struct pci_dev *parent, struct aer_err_info *e_info) { struct pci_dev *dev = parent; int result; /* Must reset in this function */ e_info->error_dev_num = 0; /* Is Root Port an agent that sends error message? */ result = find_device_iter(dev, e_info); if (result) return true; pci_walk_bus(parent->subordinate, find_device_iter, e_info); if (!e_info->error_dev_num) { #ifdef CONFIG_DEBUG_PRINTK dev_printk(KERN_DEBUG, &parent->dev, "can't find device of ID%04x\n", e_info->id); #else dev_; #endif return false; } return true; }
static void set_downstream_devices_error_reporting(struct pci_dev *dev, bool enable) { set_device_error_reporting(dev, &enable); if (!dev->subordinate) return; pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); }
/** * broadcast_error_message - handle message broadcast to downstream drivers * @dev: pointer to from where in a hierarchy message is broadcasted down * @state: error state * @error_mesg: message to print * @cb: callback to be broadcasted * * Invoked during error recovery process. Once being invoked, the content * of error severity will be broadcasted to all downstream drivers in a * hierarchy in question. */ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, enum pci_channel_state state, char *error_mesg, int (*cb)(struct pci_dev *, void *)) { struct aer_broadcast_data result_data; #ifdef CONFIG_DEBUG_PRINTK dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg); #else dev_; #endif result_data.state = state; if (cb == report_error_detected) result_data.result = PCI_ERS_RESULT_CAN_RECOVER; else result_data.result = PCI_ERS_RESULT_RECOVERED; if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { /* * If the error is reported by a bridge, we think this error * is related to the downstream link of the bridge, so we * do error recovery on all subordinates of the bridge instead * of the bridge and clear the error status of the bridge. */ if (cb == report_error_detected) dev->error_state = state; pci_walk_bus(dev->subordinate, cb, &result_data); if (cb == report_resume) { pci_cleanup_aer_uncorrect_error_status(dev); dev->error_state = pci_channel_io_normal; } } else { /* * If the error is reported by an end point, we think this * error is related to the upstream link of the end point. */ pci_walk_bus(dev->bus, cb, &result_data); } return result_data.result; }
/** * find_source_device - search through device hierarchy for source device * @parent: pointer to Root Port pci_dev data structure * @err_info: including detailed error information such like id * * Invoked when error is detected at the Root Port. */ static void find_source_device(struct pci_dev *parent, struct aer_err_info *e_info) { struct pci_dev *dev = parent; int result; /* Is Root Port an agent that sends error message? */ result = find_device_iter(dev, e_info); if (result) return; pci_walk_bus(parent->subordinate, find_device_iter, e_info); }
static void find_source_device(struct pci_dev *parent, struct aer_err_info *e_info) { struct pci_dev *dev = parent; int result; result = find_device_iter(dev, e_info); if (result) return; pci_walk_bus(parent->subordinate, find_device_iter, e_info); }
struct pci_dn * handle_eeh_events (struct eeh_event *event) { struct device_node *frozen_dn; struct pci_dn *frozen_pdn; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str; frozen_dn = find_device_pe(event->dn); if (!frozen_dn) { location = of_get_property(event->dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", location, pci_name(event->dev)); return NULL; } frozen_bus = pcibios_find_pci_bus(frozen_dn); location = of_get_property(frozen_dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. * In the old style, it was the highest EEH-capable device * which was always an EADS pci bridge. In the new style, * there might not be any EADS bridges, and even when there are, * the firmware marks them as "EEH incapable". So another * two-step is needed to find the pci bus.. */ if (!frozen_bus) frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); if (!frozen_bus) { printk(KERN_ERR "EEH: Cannot find PCI bus " "for location=%s dn=%s\n", location, frozen_dn->full_name); return NULL; } frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; if (frozen_pdn->pcidev) { pci_str = pci_name (frozen_pdn->pcidev); drv_str = pcid_name (frozen_pdn->pcidev); } else { pci_str = pci_name (event->dev); drv_str = pcid_name (event->dev); } if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_pdn->eeh_freeze_count); printk(KERN_WARNING "EEH: location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs * to accomplish the reset. Each child gets a report of the * status ... if any child can't handle the reset, then the entire * slot is dlpar removed and added. */ pci_walk_bus(frozen_bus, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; } /* Since rtas may enable MMIO when posting the error log, * don't post the error log until after all dev drivers * have been informed. */ eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { rc = eeh_reset_device(frozen_pdn, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; } } /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO); if (rc < 0) goto hard_fail; if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); } } /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA); if (rc < 0) goto hard_fail; if (rc) result = PCI_ERS_RESULT_NEED_RESET; else result = PCI_ERS_RESULT_RECOVERED; } /* If any device has a hard failure, then shut off everything. */ if (result == PCI_ERS_RESULT_DISCONNECT) { printk(KERN_WARNING "EEH: Device driver gave up\n"); goto hard_fail; } /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_pdn, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; } result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_reset, &result); } /* All devices should claim they have recovered by now. */ if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { printk(KERN_WARNING "EEH: Not recovered\n"); goto hard_fail; } /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); return frozen_pdn; excess_failures: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ printk(KERN_ERR "EEH: PCI device at location=%s driver=%s pci addr=%s \n" "has failed %d times in the last hour " "and has been permanently disabled. \n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); goto perm_error; hard_fail: printk(KERN_ERR "EEH: Unable to recover from failure of PCI device " "at location=%s driver=%s pci addr=%s \n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE); /* Notify all devices that they're about to go down. */ pci_walk_bus(frozen_bus, eeh_report_failure, NULL); /* Shut down the device drivers for good. */ pcibios_remove_pci_devices(frozen_bus); return NULL; }
struct pci_dn * handle_eeh_events (struct eeh_event *event) { struct device_node *frozen_dn; struct pci_dn *frozen_pdn; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str; frozen_dn = find_device_pe(event->dn); frozen_bus = pcibios_find_pci_bus(frozen_dn); if (!frozen_dn) { location = (char *) get_property(event->dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", location, pci_name(event->dev)); return NULL; } location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. * In the old style, it was the highest EEH-capable device * which was always an EADS pci bridge. In the new style, * there might not be any EADS bridges, and even when there are, * the firmware marks them as "EEH incapable". So another * two-step is needed to find the pci bus.. */ if (!frozen_bus) frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); if (!frozen_bus) { printk(KERN_ERR "EEH: Cannot find PCI bus " "for location=%s dn=%s\n", location, frozen_dn->full_name); return NULL; } #if 0 /* We may get "permanent failure" messages on empty slots. * These are false alarms. Empty slots have no child dn. */ if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL)) return; #endif frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; if (frozen_pdn->pcidev) { pci_str = pci_name (frozen_pdn->pcidev); drv_str = pcid_name (frozen_pdn->pcidev); } else { pci_str = pci_name (event->dev); drv_str = pcid_name (event->dev); } if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; /* If the reset state is a '5' and the time to reset is 0 (infinity) * or is more then 15 seconds, then mark this as a permanent failure. */ if ((event->state == pci_channel_io_perm_failure) && ((event->time_unavail <= 0) || (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000))) goto hard_fail; eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); printk(KERN_WARNING "EEH: This PCI device has failed %d times since last reboot: " "location=%s driver=%s pci addr=%s\n", frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs * to accomplish the reset. Each child gets a report of the * status ... if any child can't handle the reset, then the entire * slot is dlpar removed and added. */ pci_walk_bus(frozen_bus, eeh_report_error, &result); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { rc = eeh_reset_device(frozen_pdn, frozen_bus); if (rc) goto hard_fail; } /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_pdn, NULL); if (rc) goto hard_fail; pci_walk_bus(frozen_bus, eeh_report_reset, NULL); } /* If all devices reported they can proceed, the re-enable PIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { /* XXX Not supported; we brute-force reset the device */ rc = eeh_reset_device(frozen_pdn, NULL); if (rc) goto hard_fail; pci_walk_bus(frozen_bus, eeh_report_reset, NULL); } /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); return frozen_pdn; excess_failures: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ printk(KERN_ERR "EEH: PCI device at location=%s driver=%s pci addr=%s \n" "has failed %d times and has been permanently disabled. \n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); goto perm_error; hard_fail: printk(KERN_ERR "EEH: Unable to recover from failure of PCI device " "at location=%s driver=%s pci addr=%s \n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); /* Notify all devices that they're about to go down. */ pci_walk_bus(frozen_bus, eeh_report_failure, NULL); /* Shut down the device drivers for good. */ pcibios_remove_pci_devices(frozen_bus); return NULL; }
struct eeh_dev *handle_eeh_events(struct eeh_event *event) { struct device_node *frozen_dn; struct eeh_dev *frozen_edev; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); if (!frozen_dn) { location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); return NULL; } frozen_bus = pcibios_find_pci_bus(frozen_dn); location = of_get_property(frozen_dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; if (!frozen_bus) frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); if (!frozen_bus) { printk(KERN_ERR "EEH: Cannot find PCI bus " "for location=%s dn=%s\n", location, frozen_dn->full_name); return NULL; } frozen_edev = of_node_to_eeh_dev(frozen_dn); frozen_edev->freeze_count++; pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_edev->freeze_count); if (frozen_edev->pdev) { bus_pci_str = pci_name(frozen_edev->pdev); bus_drv_str = eeh_pcid_name(frozen_edev->pdev); printk(KERN_WARNING "EEH: Bus location=%s driver=%s pci addr=%s\n", location, bus_drv_str, bus_pci_str); } printk(KERN_WARNING "EEH: Device location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); pci_walk_bus(frozen_bus, eeh_report_error, &result); rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; } eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); if (result == PCI_ERS_RESULT_NONE) { rc = eeh_reset_device(frozen_edev, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; } } if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); } } if (result == PCI_ERS_RESULT_CAN_RECOVER) { rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; if (rc) result = PCI_ERS_RESULT_NEED_RESET; else result = PCI_ERS_RESULT_RECOVERED; } if (result == PCI_ERS_RESULT_DISCONNECT) { printk(KERN_WARNING "EEH: Device driver gave up\n"); goto hard_fail; } if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_edev, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; } result = PCI_ERS_RESULT_NONE; pci_walk_bus(frozen_bus, eeh_report_reset, &result); } if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { printk(KERN_WARNING "EEH: Not recovered\n"); goto hard_fail; } pci_walk_bus(frozen_bus, eeh_report_resume, NULL); return frozen_edev; excess_failures: printk(KERN_ERR "EEH: PCI device at location=%s driver=%s pci addr=%s\n" "has failed %d times in the last hour " "and has been permanently disabled.\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str, frozen_edev->freeze_count); goto perm_error; hard_fail: printk(KERN_ERR "EEH: Unable to recover from failure of PCI device " "at location=%s driver=%s pci addr=%s\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); pci_walk_bus(frozen_bus, eeh_report_failure, NULL); pcibios_remove_pci_devices(frozen_bus); return NULL; }