Example #1
0
/**
 * genwqe_recover_card() - Try to recover the card if it is possible
 *
 * If fatal_err is set no register access is possible anymore. It is
 * likely that genwqe_start fails in that situation. Proper error
 * handling is required in this case.
 *
 * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
 * and later genwqe_probe() for all virtual functions.
 */
static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
{
	int rc;
	struct pci_dev *pci_dev = cd->pci_dev;

	genwqe_stop(cd);

	/*
	 * Make sure chip is not reloaded to maintain FFDC. Write SLU
	 * Reset Register, CPLDReset field to 0.
	 */
	if (!fatal_err) {
		cd->softreset = 0x70ull;
		__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
	}

	rc = genwqe_bus_reset(cd);
	if (rc != 0) {
		dev_err(&pci_dev->dev,
			"[%s] err: card recovery impossible!\n", __func__);
		return rc;
	}

	rc = genwqe_start(cd);
	if (rc < 0) {
		dev_err(&pci_dev->dev,
			"[%s] err: failed to launch device!\n", __func__);
		return rc;
	}
	return 0;
}
Example #2
0
static void genwqe_tweak_hardware(struct genwqe_dev *cd)
{
	struct pci_dev *pci_dev = cd->pci_dev;

	/* Mask FIRs for development images */
	if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
	    ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
		dev_warn(&pci_dev->dev,
			 "FIRs masked due to bitstream %016llx.%016llx\n",
			 cd->slu_unitcfg, cd->app_unitcfg);

		__genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
				0xFFFFFFFFFFFFFFFFull);

		__genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
				0x0000000000000000ull);
	}
}
Example #3
0
/*
 * genwqe_reload_bistream() - reload card bitstream
 *
 * Set the appropriate register and call fundamental reset to reaload the card
 * bitstream.
 *
 * Return: 0 on success, error code otherwise
 */
static int genwqe_reload_bistream(struct genwqe_dev *cd)
{
	struct pci_dev *pci_dev = cd->pci_dev;
	int rc;

	dev_info(&pci_dev->dev,
		 "[%s] resetting card for bitstream reload\n",
		 __func__);

	genwqe_stop(cd);

	/*
	 * Cause a CPLD reprogram with the 'next_bitstream'
	 * partition on PCIe hot or fundamental reset
	 */
	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
			(cd->softreset & 0xcull) | 0x70ull);

	rc = genwqe_pci_fundamental_reset(pci_dev);
	if (rc) {
		/*
		 * A fundamental reset failure can be caused
		 * by lack of support on the arch, so we just
		 * log the error and try to start the card
		 * again.
		 */
		dev_err(&pci_dev->dev,
			"[%s] err: failed to reset card for bitstream reload\n",
			__func__);
	}

	rc = genwqe_start(cd);
	if (rc) {
		dev_err(&pci_dev->dev,
			"[%s] err: cannot start card services! (err=%d)\n",
			__func__, rc);
		return rc;
	}
	dev_info(&pci_dev->dev,
		 "[%s] card reloaded\n", __func__);
	return 0;
}
Example #4
0
static ssize_t next_bitstream_store(struct device *dev,
				    struct device_attribute *attr,
				    const char *buf, size_t count)
{
	int partition;
	struct genwqe_dev *cd = dev_get_drvdata(dev);

	if (kstrtoint(buf, 0, &partition) < 0)
		return -EINVAL;

	switch (partition) {
	case 0x0:
		cd->softreset = 0x78;
		break;
	case 0x1:
		cd->softreset = 0x7c;
		break;
	default:
		return -EINVAL;
	}

	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
	return count;
}
Example #5
0
/**
 * genwqe_fir_checking() - Check the fault isolation registers of the card
 *
 * If this code works ok, can be tried out with help of the genwqe_poke tool:
 *   sudo ./tools/genwqe_poke 0x8 0xfefefefefef
 *
 * Now the relevant FIRs/sFIRs should be printed out and the driver should
 * invoke recovery (devices are removed and readded).
 */
static u64 genwqe_fir_checking(struct genwqe_dev *cd)
{
	int j, iterations = 0;
	u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
	u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
	struct pci_dev *pci_dev = cd->pci_dev;

 healthMonitor:
	iterations++;
	if (iterations > 16) {
		dev_err(&pci_dev->dev, "* exit looping after %d times\n",
			iterations);
		goto fatal_error;
	}

	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
	if (gfir != 0x0)
		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
				    IO_SLC_CFGREG_GFIR, gfir);
	if (gfir == IO_ILLEGAL_VALUE)
		goto fatal_error;

	/*
	 * Avoid printing when to GFIR bit is on prevents contignous
	 * printout e.g. for the following bug:
	 *   FIR set without a 2ndary FIR/FIR cannot be cleared
	 * Comment out the following if to get the prints:
	 */
	if (gfir == 0)
		return 0;

	gfir_masked = gfir & GFIR_ERR_TRIGGER;  /* fatal errors */

	for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */

		/* read the primary FIR (pfir) */
		fir_addr = (uid << 24) + 0x08;
		fir = __genwqe_readq(cd, fir_addr);
		if (fir == 0x0)
			continue;  /* no error in this unit */

		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
		if (fir == IO_ILLEGAL_VALUE)
			goto fatal_error;

		/* read primary FEC */
		fec_addr = (uid << 24) + 0x18;
		fec = __genwqe_readq(cd, fec_addr);

		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
		if (fec == IO_ILLEGAL_VALUE)
			goto fatal_error;

		for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {

			/* secondary fir empty, skip it */
			if ((fir & mask) == 0x0)
				continue;

			sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
			sfir = __genwqe_readq(cd, sfir_addr);

			if (sfir == IO_ILLEGAL_VALUE)
				goto fatal_error;
			dev_err(&pci_dev->dev,
				"* 0x%08x 0x%016llx\n", sfir_addr, sfir);

			sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
			sfec = __genwqe_readq(cd, sfec_addr);

			if (sfec == IO_ILLEGAL_VALUE)
				goto fatal_error;
			dev_err(&pci_dev->dev,
				"* 0x%08x 0x%016llx\n", sfec_addr, sfec);

			gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
			if (gfir == IO_ILLEGAL_VALUE)
				goto fatal_error;

			/* gfir turned on during routine! get out and
			   start over. */
			if ((gfir_masked == 0x0) &&
			    (gfir & GFIR_ERR_TRIGGER)) {
				goto healthMonitor;
			}

			/* do not clear if we entered with a fatal gfir */
			if (gfir_masked == 0x0) {

				/* NEW clear by mask the logged bits */
				sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
				__genwqe_writeq(cd, sfir_addr, sfir);

				dev_dbg(&pci_dev->dev,
					"[HM] Clearing  2ndary FIR 0x%08x with 0x%016llx\n",
					sfir_addr, sfir);

				/*
				 * note, these cannot be error-Firs
				 * since gfir_masked is 0 after sfir
				 * was read. Also, it is safe to do
				 * this write if sfir=0. Still need to
				 * clear the primary. This just means
				 * there is no secondary FIR.
				 */

				/* clear by mask the logged bit. */
				fir_clr_addr = (uid << 24) + 0x10;
				__genwqe_writeq(cd, fir_clr_addr, mask);

				dev_dbg(&pci_dev->dev,
					"[HM] Clearing primary FIR 0x%08x with 0x%016llx\n",
					fir_clr_addr, mask);
			}
		}
	}
	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
	if (gfir == IO_ILLEGAL_VALUE)
		goto fatal_error;

	if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
		/*
		 * Check once more that it didn't go on after all the
		 * FIRS were cleared.
		 */
		dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
			iterations);
		goto healthMonitor;
	}
	return gfir_masked;

 fatal_error:
	return IO_ILLEGAL_VALUE;
}
Example #6
0
static int genwqe_start(struct genwqe_dev *cd)
{
	int err;
	struct pci_dev *pci_dev = cd->pci_dev;

	err = genwqe_read_ids(cd);
	if (err)
		return err;

	if (genwqe_is_privileged(cd)) {
		/* do this after the tweaks. alloc fail is acceptable */
		genwqe_ffdc_buffs_alloc(cd);
		genwqe_stop_traps(cd);

		/* Collect registers e.g. FIRs, UNITIDs, traces ... */
		genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
				      cd->ffdc[GENWQE_DBG_REGS].entries, 0);

		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
				      cd->ffdc[GENWQE_DBG_UNIT0].regs,
				      cd->ffdc[GENWQE_DBG_UNIT0].entries);

		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
				      cd->ffdc[GENWQE_DBG_UNIT1].regs,
				      cd->ffdc[GENWQE_DBG_UNIT1].entries);

		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
				      cd->ffdc[GENWQE_DBG_UNIT2].regs,
				      cd->ffdc[GENWQE_DBG_UNIT2].entries);

		genwqe_start_traps(cd);

		if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
			dev_warn(&pci_dev->dev,
				 "[%s] chip reload/recovery!\n", __func__);

			/*
			 * Stealth Mode: Reload chip on either hot
			 * reset or PERST.
			 */
			cd->softreset = 0x7Cull;
			__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
				       cd->softreset);

			err = genwqe_bus_reset(cd);
			if (err != 0) {
				dev_err(&pci_dev->dev,
					"[%s] err: bus reset failed!\n",
					__func__);
				goto out;
			}

			/*
			 * Re-read the IDs because
			 * it could happen that the bitstream load
			 * failed!
			 */
			err = genwqe_read_ids(cd);
			if (err)
				goto out;
		}
	}

	err = genwqe_setup_service_layer(cd);  /* does a reset to the card */
	if (err != 0) {
		dev_err(&pci_dev->dev,
			"[%s] err: could not setup servicelayer!\n", __func__);
		err = -ENODEV;
		goto out;
	}

	if (genwqe_is_privileged(cd)) {	 /* code is running _after_ reset */
		genwqe_tweak_hardware(cd);

		genwqe_setup_pf_jtimer(cd);
		genwqe_setup_vf_jtimer(cd);
	}

	err = genwqe_device_create(cd);
	if (err < 0) {
		dev_err(&pci_dev->dev,
			"err: chdev init failed! (err=%d)\n", err);
		goto out_release_service_layer;
	}
	return 0;

 out_release_service_layer:
	genwqe_release_service_layer(cd);
 out:
	if (genwqe_is_privileged(cd))
		genwqe_ffdc_buffs_free(cd);
	return -EIO;
}