Exemplo n.º 1
0
/** 
 * Return the "partitionable endpoint" (pe) under which this device lies
 */
struct device_node * find_device_pe(struct device_node *dn)
{
	while ((dn->parent) && PCI_DN(dn->parent) &&
	      (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
		dn = dn->parent;
	}
	return dn;
}
Exemplo n.º 2
0
/**
 * eeh_enable_irq - enable interrupt for the recovering device
 */
static void eeh_enable_irq(struct pci_dev *dev)
{
	struct device_node *dn = pci_device_to_OF_node(dev);

	if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
		enable_irq(dev->irq);
	}
}
Exemplo n.º 3
0
static void __eeh_clear_slot (struct device_node *dn, int mode_flag)
{
	while (dn) {
		if (PCI_DN(dn)) {
			PCI_DN(dn)->eeh_mode &= ~mode_flag;
			PCI_DN(dn)->eeh_check_count = 0;
			if (dn->child)
				__eeh_clear_slot (dn->child, mode_flag);
		}
		dn = dn->sibling;
	}
}
Exemplo n.º 4
0
void eeh_clear_slot (struct device_node *dn, int mode_flag)
{
	unsigned long flags;
	spin_lock_irqsave(&confirm_error_lock, flags);
	
	dn = find_device_pe (dn);
	
	/* Back up one, since config addrs might be shared */
	if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)
		dn = dn->parent;

	PCI_DN(dn)->eeh_mode &= ~mode_flag;
	PCI_DN(dn)->eeh_check_count = 0;
	__eeh_clear_slot (dn->child, mode_flag);
	spin_unlock_irqrestore(&confirm_error_lock, flags);
}
Exemplo n.º 5
0
static void eeh_report_error(struct pci_dev *dev, void *userdata)
{
	enum pci_ers_result rc, *res = userdata;
	struct pci_driver *driver = dev->driver;

	dev->error_state = pci_channel_io_frozen;

	if (!driver)
		return;

	if (irq_in_use (dev->irq)) {
		struct device_node *dn = pci_device_to_OF_node(dev);
		PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
		disable_irq_nosync(dev->irq);
	}
	if (!driver->err_handler ||
	    !driver->err_handler->error_detected)
		return;

	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);

	/* A driver that needs a reset trumps all others */
	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
}
Exemplo n.º 6
0
static void iommu_bus_setup_pSeries(struct pci_bus *bus)
{
	struct device_node *dn;
	struct iommu_table *tbl;
	struct device_node *isa_dn, *isa_dn_orig;
	struct device_node *tmp;
	struct pci_dn *pci;
	int children;

	DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);

	dn = pci_bus_to_OF_node(bus);
	pci = PCI_DN(dn);

	if (bus->self) {
		/* This is not a root bus, any setup will be done for the
		 * device-side of the bridge in iommu_dev_setup_pSeries().
		 */
		return;
	}

	/* Check if the ISA bus on the system is under
	 * this PHB.
	 */
	isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");

	while (isa_dn && isa_dn != dn)
		isa_dn = isa_dn->parent;

	if (isa_dn_orig)
		of_node_put(isa_dn_orig);

	/* Count number of direct PCI children of the PHB.
	 * All PCI device nodes have class-code property, so it's
	 * an easy way to find them.
	 */
	for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
		if (get_property(tmp, "class-code", NULL))
			children++;

	DBG("Children: %d\n", children);

	/* Calculate amount of DMA window per slot. Each window must be
	 * a power of two (due to pci_alloc_consistent requirements).
	 *
	 * Keep 256MB aside for PHBs with ISA.
	 */

	if (!isa_dn) {
		/* No ISA/IDE - just set window size and return */
		pci->phb->dma_window_size = 0x80000000ul; /* To be divided */

		while (pci->phb->dma_window_size * children > 0x80000000ul)
			pci->phb->dma_window_size >>= 1;
		DBG("No ISA/IDE, window size is 0x%lx\n",
			pci->phb->dma_window_size);
		pci->phb->dma_window_base_cur = 0;

		return;
	}
Exemplo n.º 7
0
static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
{
	struct device_node *dn;
	int cnt, rc;

	/* pcibios will clear the counter; save the value */
	cnt = pe_dn->eeh_freeze_count;

	if (bus)
		pcibios_remove_pci_devices(bus);

	/* Reset the pci controller. (Asserts RST#; resets config space).
	 * Reconfigure bridges and devices. Don't try to bring the system
	 * up if the reset failed for some reason. */
	rc = rtas_set_slot_reset(pe_dn);
	if (rc)
		return rc;

	/* Walk over all functions on this device.  */
	dn = pe_dn->node;
	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
		dn = dn->parent->child;

	while (dn) {
		struct pci_dn *ppe = PCI_DN(dn);
		/* On Power4, always true because eeh_pe_config_addr=0 */
		if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
			rtas_configure_bridge(ppe);
			eeh_restore_bars(ppe);
 		}
		dn = dn->sibling;
	}

	/* Give the system 5 seconds to finish running the user-space
	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
	 * this is a hack, but if we don't do this, and try to bring 
	 * the device up before the scripts have taken it down, 
	 * potentially weird things happen.
	 */
	if (bus) {
		ssleep (5);
		pcibios_add_pci_devices(bus);
	}
	pe_dn->eeh_freeze_count = cnt;

	return 0;
}
Exemplo n.º 8
0
static int of_pci_phb_probe(struct platform_device *dev)
{
	struct pci_controller *phb;

	/* Check if we can do that ... */
	if (ppc_md.pci_setup_phb == NULL)
		return -ENODEV;

	pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);

	/* Alloc and setup PHB data structure */
	phb = pcibios_alloc_controller(dev->dev.of_node);
	if (!phb)
		return -ENODEV;

	/* Setup parent in sysfs */
	phb->parent = &dev->dev;

	/* Setup the PHB using arch provided callback */
	if (ppc_md.pci_setup_phb(phb)) {
		pcibios_free_controller(phb);
		return -ENODEV;
	}

	/* Process "ranges" property */
	pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0);

	/* Init pci_dn data structures */
	pci_devs_phb_init_dynamic(phb);

	/* Create EEH devices for the PHB */
	eeh_dev_phb_init_dynamic(phb);

	/* Register devices with EEH */
	if (dev->dev.of_node->child)
		eeh_add_device_tree_early(PCI_DN(dev->dev.of_node));

	/* Scan the bus */
	pcibios_scan_phb(phb);
	if (phb->bus == NULL)
		return -ENXIO;

	/* Claim resources. This might need some rework as well depending
	 * whether we are doing probe-only or not, like assigning unassigned
	 * resources etc...
	 */
	pcibios_claim_one_bus(phb->bus);

	/* Finish EEH setup */
	eeh_add_device_tree_late(phb->bus);

	/* Add probed PCI devices to the device model */
	pci_bus_add_devices(phb->bus);

	/* sysfs files should only be added after devices are added */
	eeh_add_sysfs_files(phb->bus);

	return 0;
}
Exemplo n.º 9
0
/*
 * Generate a Direct Select Address for the Hypervisor
 */
static inline u64 iseries_ds_addr(struct device_node *node)
{
	struct pci_dn *pdn = PCI_DN(node);
	const u32 *sbp = of_get_property(node, "linux,subbus", NULL);

	return ((u64)pdn->busno << 48) + ((u64)(sbp ? *sbp : 0) << 40)
			+ ((u64)0x10 << 32);
}
/**
 * pseries_eeh_write_config - Write PCI config space
 * @dn: device node
 * @where: PCI address
 * @size: size to write
 * @val: value to be written
 *
 * Write config space to the specified device
 */
static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val)
{
	struct pci_dn *pdn;

	pdn = PCI_DN(dn);

	return rtas_write_config(pdn, where, size, val);
}
Exemplo n.º 11
0
void eeh_mark_slot (struct device_node *dn, int mode_flag)
{
	struct pci_dev *dev;
	dn = find_device_pe (dn);

	/* Back up one, since config addrs might be shared */
	if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)
		dn = dn->parent;

	PCI_DN(dn)->eeh_mode |= mode_flag;

	/* Mark the pci device too */
	dev = PCI_DN(dn)->pcidev;
	if (dev)
		dev->error_state = pci_channel_io_frozen;

	__eeh_mark_slot (dn->child, mode_flag);
}
Exemplo n.º 12
0
static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
{
	while (dn) {
		if (PCI_DN(dn)) {
			/* Mark the pci device driver too */
			struct pci_dev *dev = PCI_DN(dn)->pcidev;

			PCI_DN(dn)->eeh_mode |= mode_flag;

			if (dev && dev->driver)
				dev->error_state = pci_channel_io_frozen;

			if (dn->child)
				__eeh_mark_slot (dn->child, mode_flag);
		}
		dn = dn->sibling;
	}
}
Exemplo n.º 13
0
static void eeh_report_reset(struct pci_dev *dev, void *userdata)
{
	struct pci_driver *driver = dev->driver;
	struct device_node *dn = pci_device_to_OF_node(dev);

	if (!driver)
		return;

	if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
		enable_irq(dev->irq);
	}
	if (!driver->err_handler)
		return;
	if (!driver->err_handler->slot_reset)
		return;

	driver->err_handler->slot_reset(dev);
}
Exemplo n.º 14
0
static void eeh_report_resume(struct pci_dev *dev, void *userdata)
{
	struct pci_driver *driver = dev->driver;
	struct device_node *dn = pci_device_to_OF_node(dev);

	dev->error_state = pci_channel_io_normal;

	if (!driver)
		return;

	if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
		enable_irq(dev->irq);
	}
	if (!driver->err_handler ||
	    !driver->err_handler->resume)
		return;

	driver->err_handler->resume(dev);
}
Exemplo n.º 15
0
static inline struct iommu_table *devnode_table(struct device *dev)
{
	struct pci_dev *pdev;

	if (!dev) {
		pdev = ppc64_isabridge_dev;
		if (!pdev)
			return NULL;
	} else
		pdev = to_pci_dev(dev);

	return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
}
Exemplo n.º 16
0
static void eeh_report_reset(struct pci_dev *dev, void *userdata)
{
	enum pci_ers_result rc, *res = userdata;
	struct pci_driver *driver = dev->driver;
	struct device_node *dn = pci_device_to_OF_node(dev);

	if (!driver)
		return;

	if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
		enable_irq(dev->irq);
	}
	if (!driver->err_handler ||
	    !driver->err_handler->slot_reset)
		return;

	rc = driver->err_handler->slot_reset(dev);
	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
	if (*res == PCI_ERS_RESULT_DISCONNECT &&
	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
}
Exemplo n.º 17
0
/**
 * eeh_restore_bars - restore the PCI config space info
 *
 * This routine performs a recursive walk to the children
 * of this device as well.
 */
void eeh_restore_bars(struct pci_dn *pdn)
{
	struct device_node *dn;
	if (!pdn) 
		return;
	
	if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
		__restore_bars (pdn);

	dn = pdn->node->child;
	while (dn) {
		eeh_restore_bars (PCI_DN(dn));
		dn = dn->sibling;
	}
}
Exemplo n.º 18
0
static void print_device_node_tree (struct pci_dn *pdn, int dent)
{
	int i;
	if (!pdn) return;
	for (i=0;i<dent; i++)
		printk(" ");
	printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
		pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
		pdn->eeh_pe_config_addr, pdn->node->full_name);
	dent += 3;
	struct device_node *pc = pdn->node->child;
	while (pc) {
		print_device_node_tree(PCI_DN(pc), dent);
		pc = pc->sibling;
	}
}
Exemplo n.º 19
0
/*
 * iommu_table_setparms_lpar
 *
 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
 */
static void iommu_table_setparms_lpar(struct pci_controller *phb,
				      struct device_node *dn,
				      struct iommu_table *tbl,
				      unsigned char *dma_window)
{
	unsigned long offset, size;

	tbl->it_busno  = PCI_DN(dn)->bussubno;
	of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);

	tbl->it_base   = 0;
	tbl->it_blocksize  = 16;
	tbl->it_type = TCE_PCI;
	tbl->it_offset = offset >> PAGE_SHIFT;
	tbl->it_size = size >> PAGE_SHIFT;
}
Exemplo n.º 20
0
/**
 * eeh_disable_irq - disable interrupt for the recovering device
 */
static void eeh_disable_irq(struct pci_dev *dev)
{
	struct device_node *dn = pci_device_to_OF_node(dev);

	/* Don't disable MSI and MSI-X interrupts. They are
	 * effectively disabled by the DMA Stopped state
	 * when an EEH error occurs.
	*/
	if (dev->msi_enabled || dev->msix_enabled)
		return;

	if (!irq_has_action(dev->irq))
		return;

	PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
	disable_irq_nosync(dev->irq);
}
Exemplo n.º 21
0
static int rtas_pci_write_config(struct pci_bus *bus,
				 unsigned int devfn,
				 int where, int size, u32 val)
{
	struct device_node *busdn, *dn;

	busdn = pci_bus_to_OF_node(bus);

	/* Search only direct children of the bus */
	for (dn = busdn->child; dn; dn = dn->sibling) {
		struct pci_dn *pdn = PCI_DN(dn);
		if (pdn && pdn->devfn == devfn
		    && of_device_is_available(dn))
			return rtas_write_config(pdn, where, size, val);
	}
	return PCIBIOS_DEVICE_NOT_FOUND;
}
Exemplo n.º 22
0
/*
 * iommu_table_setparms_lpar
 *
 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
 *
 * ToDo: properly interpret the ibm,dma-window property.  The definition is:
 *	logical-bus-number	(1 word)
 *	phys-address		(#address-cells words)
 *	size			(#cell-size words)
 *
 * Currently we hard code these sizes (more or less).
 */
static void iommu_table_setparms_lpar(struct pci_controller *phb,
				      struct device_node *dn,
				      struct iommu_table *tbl,
				      unsigned int *dma_window)
{
	tbl->it_busno  = PCI_DN(dn)->bussubno;

	/* TODO: Parse field size properties properly. */
	tbl->it_size   = (((unsigned long)dma_window[4] << 32) |
			   (unsigned long)dma_window[5]) >> PAGE_SHIFT;
	tbl->it_offset = (((unsigned long)dma_window[2] << 32) |
			   (unsigned long)dma_window[3]) >> PAGE_SHIFT;
	tbl->it_base   = 0;
	tbl->it_index  = dma_window[0];
	tbl->it_blocksize  = 16;
	tbl->it_type = TCE_PCI;
}
Exemplo n.º 23
0
static void eeh_report_failure(struct pci_dev *dev, void *userdata)
{
	struct pci_driver *driver = dev->driver;

	dev->error_state = pci_channel_io_perm_failure;

	if (!driver)
		return;

	if (irq_in_use (dev->irq)) {
		struct device_node *dn = pci_device_to_OF_node(dev);
		PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
		disable_irq_nosync(dev->irq);
	}
	if (!driver->err_handler)
		return;
	if (!driver->err_handler->error_detected)
		return;
	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
}
Exemplo n.º 24
0
static int rtas_pci_read_config(struct pci_bus *bus,
				unsigned int devfn,
				int where, int size, u32 *val)
{
	struct device_node *busdn, *dn;

	if (bus->self)
		busdn = pci_device_to_OF_node(bus->self);
	else
		busdn = bus->sysdata;	/* must be a phb */

	/* Search only direct children of the bus */
	for (dn = busdn->child; dn; dn = dn->sibling) {
		struct pci_dn *pdn = PCI_DN(dn);
		if (pdn && pdn->devfn == devfn
		    && of_device_available(dn))
			return rtas_read_config(pdn, where, size, val);
	}

	return PCIBIOS_DEVICE_NOT_FOUND;
}
Exemplo n.º 25
0
/**
 * eeh_dev_init - Create EEH device according to OF node
 * @dn: device node
 * @data: PHB
 *
 * It will create EEH device according to the given OF node. The function
 * might be called by PCI emunation, DR, PHB hotplug.
 */
void *eeh_dev_init(struct device_node *dn, void *data)
{
	struct pci_controller *phb = data;
	struct eeh_dev *edev;

	/* Allocate EEH device */
	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
	if (!edev) {
		pr_warn("%s: out of memory\n",
			__func__);
		return NULL;
	}

	/* Associate EEH device with OF node */
	PCI_DN(dn)->edev = edev;
	edev->dn  = dn;
	edev->phb = phb;
	INIT_LIST_HEAD(&edev->list);

	return NULL;
}
Exemplo n.º 26
0
struct pci_dn * handle_eeh_events (struct eeh_event *event)
{
	struct device_node *frozen_dn;
	struct pci_dn *frozen_pdn;
	struct pci_bus *frozen_bus;
	int rc = 0;
	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
	const char *location, *pci_str, *drv_str;

	frozen_dn = find_device_pe(event->dn);
	if (!frozen_dn) {

		location = of_get_property(event->dn, "ibm,loc-code", NULL);
		location = location ? location : "unknown";
		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
		                "for location=%s pci addr=%s\n",
		        location, pci_name(event->dev));
		return NULL;
	}

	frozen_bus = pcibios_find_pci_bus(frozen_dn);
	location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
	location = location ? location : "unknown";

	/* There are two different styles for coming up with the PE.
	 * In the old style, it was the highest EEH-capable device
	 * which was always an EADS pci bridge.  In the new style,
	 * there might not be any EADS bridges, and even when there are,
	 * the firmware marks them as "EEH incapable". So another
	 * two-step is needed to find the pci bus.. */
	if (!frozen_bus)
		frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);

	if (!frozen_bus) {
		printk(KERN_ERR "EEH: Cannot find PCI bus "
		        "for location=%s dn=%s\n",
		        location, frozen_dn->full_name);
		return NULL;
	}

	frozen_pdn = PCI_DN(frozen_dn);
	frozen_pdn->eeh_freeze_count++;

	if (frozen_pdn->pcidev) {
		pci_str = pci_name (frozen_pdn->pcidev);
		drv_str = pcid_name (frozen_pdn->pcidev);
	} else {
		pci_str = pci_name (event->dev);
		drv_str = pcid_name (event->dev);
	}
	
	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
		goto excess_failures;

	printk(KERN_WARNING
	   "EEH: This PCI device has failed %d times in the last hour:\n",
		frozen_pdn->eeh_freeze_count);
	printk(KERN_WARNING
		"EEH: location=%s driver=%s pci addr=%s\n",
		location, drv_str, pci_str);

	/* Walk the various device drivers attached to this slot through
	 * a reset sequence, giving each an opportunity to do what it needs
	 * to accomplish the reset.  Each child gets a report of the
	 * status ... if any child can't handle the reset, then the entire
	 * slot is dlpar removed and added.
	 */
	pci_walk_bus(frozen_bus, eeh_report_error, &result);

	/* Get the current PCI slot state. This can take a long time,
	 * sometimes over 3 seconds for certain systems. */
	rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
	if (rc < 0) {
		printk(KERN_WARNING "EEH: Permanent failure\n");
		goto hard_fail;
	}

	/* Since rtas may enable MMIO when posting the error log,
	 * don't post the error log until after all dev drivers
	 * have been informed.
	 */
	eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE);

	/* If all device drivers were EEH-unaware, then shut
	 * down all of the device drivers, and hope they
	 * go down willingly, without panicing the system.
	 */
	if (result == PCI_ERS_RESULT_NONE) {
		rc = eeh_reset_device(frozen_pdn, frozen_bus);
		if (rc) {
			printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
			goto hard_fail;
		}
	}

	/* If all devices reported they can proceed, then re-enable MMIO */
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);

		if (rc < 0)
			goto hard_fail;
		if (rc) {
			result = PCI_ERS_RESULT_NEED_RESET;
		} else {
			result = PCI_ERS_RESULT_NONE;
			pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
		}
	}

	/* If all devices reported they can proceed, then re-enable DMA */
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);

		if (rc < 0)
			goto hard_fail;
		if (rc)
			result = PCI_ERS_RESULT_NEED_RESET;
		else
			result = PCI_ERS_RESULT_RECOVERED;
	}

	/* If any device has a hard failure, then shut off everything. */
	if (result == PCI_ERS_RESULT_DISCONNECT) {
		printk(KERN_WARNING "EEH: Device driver gave up\n");
		goto hard_fail;
	}

	/* If any device called out for a reset, then reset the slot */
	if (result == PCI_ERS_RESULT_NEED_RESET) {
		rc = eeh_reset_device(frozen_pdn, NULL);
		if (rc) {
			printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
			goto hard_fail;
		}
		result = PCI_ERS_RESULT_NONE;
		pci_walk_bus(frozen_bus, eeh_report_reset, &result);
	}

	/* All devices should claim they have recovered by now. */
	if ((result != PCI_ERS_RESULT_RECOVERED) &&
	    (result != PCI_ERS_RESULT_NONE)) {
		printk(KERN_WARNING "EEH: Not recovered\n");
		goto hard_fail;
	}

	/* Tell all device drivers that they can resume operations */
	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);

	return frozen_pdn;
	
excess_failures:
	/*
	 * About 90% of all real-life EEH failures in the field
	 * are due to poorly seated PCI cards. Only 10% or so are
	 * due to actual, failed cards.
	 */
	printk(KERN_ERR
	   "EEH: PCI device at location=%s driver=%s pci addr=%s \n"
		"has failed %d times in the last hour "
		"and has been permanently disabled. \n"
		"Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
	goto perm_error;

hard_fail:
	printk(KERN_ERR
	   "EEH: Unable to recover from failure of PCI device "
	   "at location=%s driver=%s pci addr=%s \n"
	   "Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str);

perm_error:
	eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE);

	/* Notify all devices that they're about to go down. */
	pci_walk_bus(frozen_bus, eeh_report_failure, NULL);

	/* Shut down the device drivers for good. */
	pcibios_remove_pci_devices(frozen_bus);

	return NULL;
}
Exemplo n.º 27
0
/**
 * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
 * @dn device node
 * @dev pci device, if known
 *
 * Check for an EEH failure for the given device node.  Call this
 * routine if the result of a read was all 0xff's and you want to
 * find out if this is due to an EEH slot freeze.  This routine
 * will query firmware for the EEH status.
 *
 * Returns 0 if there has not been an EEH error; otherwise returns
 * a non-zero value and queues up a slot isolation event notification.
 *
 * It is safe to call this routine in an interrupt context.
 */
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
{
	int ret;
	int rets[3];
	unsigned long flags;
	struct pci_dn *pdn;
	enum pci_channel_state state;
	int rc = 0;

	total_mmio_ffs++;

	if (!eeh_subsystem_enabled)
		return 0;

	if (!dn) {
		no_dn++;
		return 0;
	}
	pdn = PCI_DN(dn);

	/* Access to IO BARs might get this far and still not want checking. */
	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
		ignored_check++;
#ifdef DEBUG
		printk ("EEH:ignored check (%x) for %s %s\n", 
		        pdn->eeh_mode, pci_name (dev), dn->full_name);
#endif
		return 0;
	}

	if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
		no_cfg_addr++;
		return 0;
	}

	/* If we already have a pending isolation event for this
	 * slot, we know it's bad already, we don't need to check.
	 * Do this checking under a lock; as multiple PCI devices
	 * in one slot might report errors simultaneously, and we
	 * only want one error recovery routine running.
	 */
	spin_lock_irqsave(&confirm_error_lock, flags);
	rc = 1;
	if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
		pdn->eeh_check_count ++;
		if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
			printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
			        pdn->eeh_check_count);
			dump_stack();
			msleep(5000);
			
			/* re-read the slot reset state */
			if (read_slot_reset_state(pdn, rets) != 0)
				rets[0] = -1;	/* reset state unknown */

			/* If we are here, then we hit an infinite loop. Stop. */
			panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
		}
		goto dn_unlock;
	}

	/*
	 * Now test for an EEH failure.  This is VERY expensive.
	 * Note that the eeh_config_addr may be a parent device
	 * in the case of a device behind a bridge, or it may be
	 * function zero of a multi-function device.
	 * In any case they must share a common PHB.
	 */
	ret = read_slot_reset_state(pdn, rets);

	/* If the call to firmware failed, punt */
	if (ret != 0) {
		printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
		       ret, dn->full_name);
		false_positives++;
		rc = 0;
		goto dn_unlock;
	}

	/* If EEH is not supported on this device, punt. */
	if (rets[1] != 1) {
		printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
		       ret, dn->full_name);
		false_positives++;
		rc = 0;
		goto dn_unlock;
	}

	/* If not the kind of error we know about, punt. */
	if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
		false_positives++;
		rc = 0;
		goto dn_unlock;
	}

	/* Note that config-io to empty slots may fail;
	 * we recognize empty because they don't have children. */
	if ((rets[0] == 5) && (dn->child == NULL)) {
		false_positives++;
		rc = 0;
		goto dn_unlock;
	}

	slot_resets++;
 
	/* Avoid repeated reports of this failure, including problems
	 * with other functions on this device, and functions under
	 * bridges. */
	eeh_mark_slot (dn, EEH_MODE_ISOLATED);
	spin_unlock_irqrestore(&confirm_error_lock, flags);

	state = pci_channel_io_normal;
	if ((rets[0] == 2) || (rets[0] == 4))
		state = pci_channel_io_frozen;
	if (rets[0] == 5)
		state = pci_channel_io_perm_failure;
	eeh_send_failure_event (dn, dev, state, rets[2]);

	/* Most EEH events are due to device driver bugs.  Having
	 * a stack trace will help the device-driver authors figure
	 * out what happened.  So print that out. */
	if (rets[0] != 5) dump_stack();
	return 1;

dn_unlock:
	spin_unlock_irqrestore(&confirm_error_lock, flags);
	return rc;
}
Exemplo n.º 28
0
struct pci_dn * handle_eeh_events (struct eeh_event *event)
{
	struct device_node *frozen_dn;
	struct pci_dn *frozen_pdn;
	struct pci_bus *frozen_bus;
	int rc = 0;
	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
	const char *location, *pci_str, *drv_str;

	frozen_dn = find_device_pe(event->dn);
	frozen_bus = pcibios_find_pci_bus(frozen_dn);

	if (!frozen_dn) {

		location = (char *) get_property(event->dn, "ibm,loc-code", NULL);
		location = location ? location : "unknown";
		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
		                "for location=%s pci addr=%s\n",
		        location, pci_name(event->dev));
		return NULL;
	}
	location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL);
	location = location ? location : "unknown";

	/* There are two different styles for coming up with the PE.
	 * In the old style, it was the highest EEH-capable device
	 * which was always an EADS pci bridge.  In the new style,
	 * there might not be any EADS bridges, and even when there are,
	 * the firmware marks them as "EEH incapable". So another
	 * two-step is needed to find the pci bus.. */
	if (!frozen_bus)
		frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);

	if (!frozen_bus) {
		printk(KERN_ERR "EEH: Cannot find PCI bus "
		        "for location=%s dn=%s\n",
		        location, frozen_dn->full_name);
		return NULL;
	}

#if 0
	/* We may get "permanent failure" messages on empty slots.
	 * These are false alarms. Empty slots have no child dn. */
	if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL))
		return;
#endif

	frozen_pdn = PCI_DN(frozen_dn);
	frozen_pdn->eeh_freeze_count++;

	if (frozen_pdn->pcidev) {
		pci_str = pci_name (frozen_pdn->pcidev);
		drv_str = pcid_name (frozen_pdn->pcidev);
	} else {
		pci_str = pci_name (event->dev);
		drv_str = pcid_name (event->dev);
	}
	
	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
		goto excess_failures;

	/* If the reset state is a '5' and the time to reset is 0 (infinity)
	 * or is more then 15 seconds, then mark this as a permanent failure.
	 */
	if ((event->state == pci_channel_io_perm_failure) &&
	    ((event->time_unavail <= 0) ||
	     (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000)))
		goto hard_fail;

	eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
	printk(KERN_WARNING
	   "EEH: This PCI device has failed %d times since last reboot: "
		"location=%s driver=%s pci addr=%s\n",
		frozen_pdn->eeh_freeze_count, location, drv_str, pci_str);

	/* Walk the various device drivers attached to this slot through
	 * a reset sequence, giving each an opportunity to do what it needs
	 * to accomplish the reset.  Each child gets a report of the
	 * status ... if any child can't handle the reset, then the entire
	 * slot is dlpar removed and added.
	 */
	pci_walk_bus(frozen_bus, eeh_report_error, &result);

	/* If all device drivers were EEH-unaware, then shut
	 * down all of the device drivers, and hope they
	 * go down willingly, without panicing the system.
	 */
	if (result == PCI_ERS_RESULT_NONE) {
		rc = eeh_reset_device(frozen_pdn, frozen_bus);
		if (rc)
			goto hard_fail;
	}

	/* If any device called out for a reset, then reset the slot */
	if (result == PCI_ERS_RESULT_NEED_RESET) {
		rc = eeh_reset_device(frozen_pdn, NULL);
		if (rc)
			goto hard_fail;
		pci_walk_bus(frozen_bus, eeh_report_reset, NULL);
	}

	/* If all devices reported they can proceed, the re-enable PIO */
	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
		/* XXX Not supported; we brute-force reset the device */
		rc = eeh_reset_device(frozen_pdn, NULL);
		if (rc)
			goto hard_fail;
		pci_walk_bus(frozen_bus, eeh_report_reset, NULL);
	}

	/* Tell all device drivers that they can resume operations */
	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);

	return frozen_pdn;
	
excess_failures:
	/*
	 * About 90% of all real-life EEH failures in the field
	 * are due to poorly seated PCI cards. Only 10% or so are
	 * due to actual, failed cards.
	 */
	printk(KERN_ERR
	   "EEH: PCI device at location=%s driver=%s pci addr=%s \n"
		"has failed %d times and has been permanently disabled. \n"
		"Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
	goto perm_error;

hard_fail:
	printk(KERN_ERR
	   "EEH: Unable to recover from failure of PCI device "
	   "at location=%s driver=%s pci addr=%s \n"
	   "Please try reseating this device or replacing it.\n",
		location, drv_str, pci_str);

perm_error:
	eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */);

	/* Notify all devices that they're about to go down. */
	pci_walk_bus(frozen_bus, eeh_report_failure, NULL);

	/* Shut down the device drivers for good. */
	pcibios_remove_pci_devices(frozen_bus);

	return NULL;
}