/** * genwqe_recover_card() - Try to recover the card if it is possible * * If fatal_err is set no register access is possible anymore. It is * likely that genwqe_start fails in that situation. Proper error * handling is required in this case. * * genwqe_bus_reset() will cause the pci code to call genwqe_remove() * and later genwqe_probe() for all virtual functions. */ static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err) { int rc; struct pci_dev *pci_dev = cd->pci_dev; genwqe_stop(cd); /* * Make sure chip is not reloaded to maintain FFDC. Write SLU * Reset Register, CPLDReset field to 0. */ if (!fatal_err) { cd->softreset = 0x70ull; __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); } rc = genwqe_bus_reset(cd); if (rc != 0) { dev_err(&pci_dev->dev, "[%s] err: card recovery impossible!\n", __func__); return rc; } rc = genwqe_start(cd); if (rc < 0) { dev_err(&pci_dev->dev, "[%s] err: failed to launch device!\n", __func__); return rc; } return 0; }
static void genwqe_tweak_hardware(struct genwqe_dev *cd) { struct pci_dev *pci_dev = cd->pci_dev; /* Mask FIRs for development images */ if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) && ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) { dev_warn(&pci_dev->dev, "FIRs masked due to bitstream %016llx.%016llx\n", cd->slu_unitcfg, cd->app_unitcfg); __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR, 0xFFFFFFFFFFFFFFFFull); __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK, 0x0000000000000000ull); } }
/* * genwqe_reload_bistream() - reload card bitstream * * Set the appropriate register and call fundamental reset to reaload the card * bitstream. * * Return: 0 on success, error code otherwise */ static int genwqe_reload_bistream(struct genwqe_dev *cd) { struct pci_dev *pci_dev = cd->pci_dev; int rc; dev_info(&pci_dev->dev, "[%s] resetting card for bitstream reload\n", __func__); genwqe_stop(cd); /* * Cause a CPLD reprogram with the 'next_bitstream' * partition on PCIe hot or fundamental reset */ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, (cd->softreset & 0xcull) | 0x70ull); rc = genwqe_pci_fundamental_reset(pci_dev); if (rc) { /* * A fundamental reset failure can be caused * by lack of support on the arch, so we just * log the error and try to start the card * again. */ dev_err(&pci_dev->dev, "[%s] err: failed to reset card for bitstream reload\n", __func__); } rc = genwqe_start(cd); if (rc) { dev_err(&pci_dev->dev, "[%s] err: cannot start card services! (err=%d)\n", __func__, rc); return rc; } dev_info(&pci_dev->dev, "[%s] card reloaded\n", __func__); return 0; }
static ssize_t next_bitstream_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int partition; struct genwqe_dev *cd = dev_get_drvdata(dev); if (kstrtoint(buf, 0, &partition) < 0) return -EINVAL; switch (partition) { case 0x0: cd->softreset = 0x78; break; case 0x1: cd->softreset = 0x7c; break; default: return -EINVAL; } __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); return count; }
/** * genwqe_fir_checking() - Check the fault isolation registers of the card * * If this code works ok, can be tried out with help of the genwqe_poke tool: * sudo ./tools/genwqe_poke 0x8 0xfefefefefef * * Now the relevant FIRs/sFIRs should be printed out and the driver should * invoke recovery (devices are removed and readded). */ static u64 genwqe_fir_checking(struct genwqe_dev *cd) { int j, iterations = 0; u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec; u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr; struct pci_dev *pci_dev = cd->pci_dev; healthMonitor: iterations++; if (iterations > 16) { dev_err(&pci_dev->dev, "* exit looping after %d times\n", iterations); goto fatal_error; } gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); if (gfir != 0x0) dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", IO_SLC_CFGREG_GFIR, gfir); if (gfir == IO_ILLEGAL_VALUE) goto fatal_error; /* * Avoid printing when to GFIR bit is on prevents contignous * printout e.g. for the following bug: * FIR set without a 2ndary FIR/FIR cannot be cleared * Comment out the following if to get the prints: */ if (gfir == 0) return 0; gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */ for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */ /* read the primary FIR (pfir) */ fir_addr = (uid << 24) + 0x08; fir = __genwqe_readq(cd, fir_addr); if (fir == 0x0) continue; /* no error in this unit */ dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir); if (fir == IO_ILLEGAL_VALUE) goto fatal_error; /* read primary FEC */ fec_addr = (uid << 24) + 0x18; fec = __genwqe_readq(cd, fec_addr); dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec); if (fec == IO_ILLEGAL_VALUE) goto fatal_error; for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) { /* secondary fir empty, skip it */ if ((fir & mask) == 0x0) continue; sfir_addr = (uid << 24) + 0x100 + 0x08 * j; sfir = __genwqe_readq(cd, sfir_addr); if (sfir == IO_ILLEGAL_VALUE) goto fatal_error; dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", sfir_addr, sfir); sfec_addr = (uid << 24) + 0x300 + 0x08 * j; sfec = __genwqe_readq(cd, sfec_addr); if (sfec == IO_ILLEGAL_VALUE) goto fatal_error; dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", sfec_addr, sfec); gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); if (gfir == IO_ILLEGAL_VALUE) goto fatal_error; /* gfir turned on during routine! get out and start over. */ if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) { goto healthMonitor; } /* do not clear if we entered with a fatal gfir */ if (gfir_masked == 0x0) { /* NEW clear by mask the logged bits */ sfir_addr = (uid << 24) + 0x100 + 0x08 * j; __genwqe_writeq(cd, sfir_addr, sfir); dev_dbg(&pci_dev->dev, "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n", sfir_addr, sfir); /* * note, these cannot be error-Firs * since gfir_masked is 0 after sfir * was read. Also, it is safe to do * this write if sfir=0. Still need to * clear the primary. This just means * there is no secondary FIR. */ /* clear by mask the logged bit. */ fir_clr_addr = (uid << 24) + 0x10; __genwqe_writeq(cd, fir_clr_addr, mask); dev_dbg(&pci_dev->dev, "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n", fir_clr_addr, mask); } } } gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); if (gfir == IO_ILLEGAL_VALUE) goto fatal_error; if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) { /* * Check once more that it didn't go on after all the * FIRS were cleared. */ dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n", iterations); goto healthMonitor; } return gfir_masked; fatal_error: return IO_ILLEGAL_VALUE; }
static int genwqe_start(struct genwqe_dev *cd) { int err; struct pci_dev *pci_dev = cd->pci_dev; err = genwqe_read_ids(cd); if (err) return err; if (genwqe_is_privileged(cd)) { /* do this after the tweaks. alloc fail is acceptable */ genwqe_ffdc_buffs_alloc(cd); genwqe_stop_traps(cd); /* Collect registers e.g. FIRs, UNITIDs, traces ... */ genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs, cd->ffdc[GENWQE_DBG_REGS].entries, 0); genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0, cd->ffdc[GENWQE_DBG_UNIT0].regs, cd->ffdc[GENWQE_DBG_UNIT0].entries); genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1, cd->ffdc[GENWQE_DBG_UNIT1].regs, cd->ffdc[GENWQE_DBG_UNIT1].entries); genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2, cd->ffdc[GENWQE_DBG_UNIT2].regs, cd->ffdc[GENWQE_DBG_UNIT2].entries); genwqe_start_traps(cd); if (cd->card_state == GENWQE_CARD_FATAL_ERROR) { dev_warn(&pci_dev->dev, "[%s] chip reload/recovery!\n", __func__); /* * Stealth Mode: Reload chip on either hot * reset or PERST. */ cd->softreset = 0x7Cull; __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); err = genwqe_bus_reset(cd); if (err != 0) { dev_err(&pci_dev->dev, "[%s] err: bus reset failed!\n", __func__); goto out; } /* * Re-read the IDs because * it could happen that the bitstream load * failed! */ err = genwqe_read_ids(cd); if (err) goto out; } } err = genwqe_setup_service_layer(cd); /* does a reset to the card */ if (err != 0) { dev_err(&pci_dev->dev, "[%s] err: could not setup servicelayer!\n", __func__); err = -ENODEV; goto out; } if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */ genwqe_tweak_hardware(cd); genwqe_setup_pf_jtimer(cd); genwqe_setup_vf_jtimer(cd); } err = genwqe_device_create(cd); if (err < 0) { dev_err(&pci_dev->dev, "err: chdev init failed! (err=%d)\n", err); goto out_release_service_layer; } return 0; out_release_service_layer: genwqe_release_service_layer(cd); out: if (genwqe_is_privileged(cd)) genwqe_ffdc_buffs_free(cd); return -EIO; }