コード例 #1
0
ファイル: bte_error.c プロジェクト: 0-T-0/ps4-linux
/*
 * Wait until all BTE related CRBs are completed
 * and then reset the interfaces.
 */
int shub1_bte_error_handler(unsigned long _nodepda)
{
	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
	nasid_t nasid;
	int i;
	int valid_crbs;
	ii_imem_u_t imem;	/* II IMEM Register */
	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
	ii_ibcr_u_t ibcr;
	ii_icmr_u_t icmr;
	ii_ieclr_u_t ieclr;

	BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda,
		    smp_processor_id()));

	if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
			    smp_processor_id()));
		return 1;
	}

	/* Determine information about our hub */
	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);

	/*
	 * A BTE transfer can use multiple CRBs.  We need to make sure
	 * that all the BTE CRBs are complete (or timed out) before
	 * attempting to clean up the error.  Resetting the BTE while
	 * there are still BTE CRBs active will hang the BTE.
	 * We should look at all the CRBs to see if they are allocated
	 * to the BTE and see if they are still active.  When none
	 * are active, we can continue with the cleanup.
	 *
	 * We also want to make sure that the local NI port is up.
	 * When a router resets the NI port can go down, while it
	 * goes through the LLP handshake, but then comes back up.
	 */
	icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
	if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
		/*
		 * There are errors which still need to be cleaned up by
		 * hubiio_crb_error_handler
		 */
		mod_timer(recovery_timer, jiffies + (HZ * 5));
		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
			    smp_processor_id()));
		return 1;
	}
	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {

		valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;

		for (i = 0; i < IIO_NUM_CRBS; i++) {
			if (!((1 << i) & valid_crbs)) {
				/* This crb was not marked as valid, ignore */
				continue;
			}
			icrbd.ii_icrb0_d_regval =
			    REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
			if (icrbd.d_bteop) {
				mod_timer(recovery_timer, jiffies + (HZ * 5));
				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
					    err_nodepda, smp_processor_id(),
					    i));
				return 1;
			}
		}
	}

	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
	/* Re-enable both bte interfaces */
	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);

	/* Clear BTE0/1 error bits */
	ieclr.ii_ieclr_regval = 0;
	if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
		ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
	if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
		ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
	REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);

	/* Reinitialize both BTE state machines. */
	ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
	ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);

	del_timer(recovery_timer);
	return 0;
}
コード例 #2
0
ファイル: shuberror.c プロジェクト: sarnobat/knoppix
int
hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
{
	cnodeid_t	cnode;
	nasid_t		nasid;
	ii_icrb0_a_u_t		icrba;		/* II CRB Register A */
	ii_icrb0_b_u_t		icrbb;		/* II CRB Register B */
	ii_icrb0_c_u_t		icrbc;		/* II CRB Register C */
	ii_icrb0_d_u_t		icrbd;		/* II CRB Register D */
	ii_icrb0_e_u_t		icrbe;		/* II CRB Register D */
	int		i;
	int		num_errors = 0;	/* Num of errors handled */
	ioerror_t	ioerror;
	int		rc;

	nasid = hinfo->h_nasid;
	cnode = NASID_TO_COMPACT_NODEID(nasid);

	/*
	 * XXX - Add locking for any recovery actions
	 */
	/*
	 * Scan through all CRBs in the Hub, and handle the errors
	 * in any of the CRBs marked.
	 */
	for (i = 0; i < IIO_NUM_CRBS; i++) {
		/* Check this crb entry to see if it is in error. */
		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));

		if (icrbb.b_mark == 0) {
			continue;
		}

		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));

		IOERROR_INIT(&ioerror);

		/* read other CRB error registers. */
		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));

		IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

		/* Check if this error is due to BTE operation,
		* and handle it separately.
		*/
		if (icrbd.d_bteop ||
			((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
			icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
			(icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
			icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))){

			int bte_num;

			if (icrbd.d_bteop)
				bte_num = icrbc.c_btenum;
			else /* b_initiator bit 2 gives BTE number */
				bte_num = (icrbb.b_initiator & 0x4) >> 2;

			hubiio_crb_free(hinfo, i);

			bte_crb_error_handler(hub_v, bte_num,
					      i, &ioerror,
					      icrbd.d_bteop);
			num_errors++;
			continue;
		}

		/*
		 * XXX
		 * Assuming the only other error that would reach here is
		 * crosstalk errors. 
		 * If CRB times out on a message from Xtalk, it changes 
		 * the message type to CRB. 
		 *
		 * If we get here due to other errors (SN0net/CRB)
		 * what's the action ?
		 */

		/*
		 * Pick out the useful fields in CRB, and
		 * tuck them away into ioerror structure.
		 */
		IOERROR_SETVALUE(&ioerror,xtalkaddr,icrba.a_addr << IIO_ICRB_ADDR_SHFT);
		IOERROR_SETVALUE(&ioerror,widgetnum,icrba.a_sidn);


		if (icrba.a_iow){
			/*
			 * XXX We shouldn't really have BRIDGE-specific code
			 * here, but alas....
			 *
			 * The BRIDGE (or XBRIDGE) sets the upper bit of TNUM
			 * to indicate a WRITE operation.  It sets the next
			 * bit to indicate an INTERRUPT operation.  The bottom
			 * 3 bits of TNUM indicate which device was responsible.
			 */
			IOERROR_SETVALUE(&ioerror,widgetdev,
					 TNUM_TO_WIDGET_DEV(icrba.a_tnum));
			/*
			* The encoding of TNUM (see comments above) is
			* different for PIC. So we'll save TNUM here and
			* deal with the differences later when we can
			* determine if we're using a Bridge or the PIC.
			*
			* XXX:  We may be able to remove saving the widgetdev
			* above and just sort it out of TNUM later.
			*/
			IOERROR_SETVALUE(&ioerror, tnum, icrba.a_tnum);

		}
		if (icrbb.b_error) {
		    /*
		     * CRB 'i' has some error. Identify the type of error,
		     * and try to handle it.
		     *
		     */
		    switch(icrbb.b_ecode) {
			case IIO_ICRB_ECODE_PERR:
			case IIO_ICRB_ECODE_WERR:
			case IIO_ICRB_ECODE_AERR:
			case IIO_ICRB_ECODE_PWERR:
			case IIO_ICRB_ECODE_TOUT:
			case IIO_ICRB_ECODE_XTERR:
			    printk("Shub II CRB %d: error %s on hub cnodeid: %d",
				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
			    /*
			     * Any sort of write error is mostly due
			     * bad programming (Note it's not a timeout.)
			     * So, invoke hub_iio_error_handler with
			     * appropriate information.
			     */
			    IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

			    /* Go through the error bit lookup phase */
			    if (error_state_set(hub_v, ERROR_STATE_LOOKUP) ==
				    ERROR_RETURN_CODE_CANNOT_SET_STATE)
				return(IOERROR_UNHANDLED);
			    rc = hub_ioerror_handler(
				    hub_v,
				    DMA_WRITE_ERROR,
				    MODE_DEVERROR,
				    &ioerror);
			    if (rc == IOERROR_HANDLED) {
				rc = hub_ioerror_handler(
					hub_v,
					DMA_WRITE_ERROR,
					MODE_DEVREENABLE,
					&ioerror);
			    }else {
				printk("Unable to handle %s on hub %d",
					hubiio_crb_errors[icrbb.b_ecode],
					cnode);
				/* panic; */
			    }
			    /* Go to Next error */
			    print_crb_fields(i, icrba, icrbb, icrbc,
				    icrbd, icrbe);
			    hubiio_crb_free(hinfo, i);
			    continue;
			case IIO_ICRB_ECODE_PRERR:
			case IIO_ICRB_ECODE_DERR:
			    printk("Shub II CRB %d: error %s on hub : %d",
				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
			    /* panic */
			default:
			    printk("Shub II CRB error (code : %d) on hub : %d",
				    icrbb.b_ecode, cnode);
			    /* panic */
		    }
		} 
		/*
		 * Error is not indicated via the errcode field
		 * Check other error indications in this register.
		 */
		if (icrbb.b_xerr) {
		    printk("Shub II CRB %d: Xtalk Packet with error bit set to hub %d",
			    i, cnode);
		    /* panic */
		}
		if (icrbb.b_lnetuce) {
		    printk("Shub II CRB %d: Uncorrectable data error detected on data "
			    " from NUMAlink to node %d",
			    i, cnode);
		    /* panic */
		}
		print_crb_fields(i, icrba, icrbb, icrbc, icrbd, icrbe);





		if (icrbb.b_error) {
		/* 
		 * CRB 'i' has some error. Identify the type of error,
		 * and try to handle it.
		 */
		switch(icrbb.b_ecode) {
		case IIO_ICRB_ECODE_PERR:
		case IIO_ICRB_ECODE_WERR:
		case IIO_ICRB_ECODE_AERR:
		case IIO_ICRB_ECODE_PWERR:

			printk("%s on hub cnodeid: %d",
				hubiio_crb_errors[icrbb.b_ecode], cnode);
			/*
			 * Any sort of write error is mostly due
			 * bad programming (Note it's not a timeout.)
			 * So, invoke hub_iio_error_handler with
			 * appropriate information.
			 */
			IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

			rc = hub_ioerror_handler(
					hub_v, 
					DMA_WRITE_ERROR, 
					MODE_DEVERROR, 
					&ioerror);

                        if (rc == IOERROR_HANDLED) {
                                rc = hub_ioerror_handler(
                                        hub_v,
                                        DMA_WRITE_ERROR,
                                        MODE_DEVREENABLE,
                                        &ioerror);
                                ASSERT(rc == IOERROR_HANDLED);
                        }else {

				panic("Unable to handle %s on hub %d",
					hubiio_crb_errors[icrbb.b_ecode],
					cnode);
				/*NOTREACHED*/
			}
			/* Go to Next error */
			hubiio_crb_free(hinfo, i);
			continue;

		case IIO_ICRB_ECODE_PRERR:

                case IIO_ICRB_ECODE_TOUT:
                case IIO_ICRB_ECODE_XTERR:

		case IIO_ICRB_ECODE_DERR:
			panic("Fatal %s on hub : %d",
				hubiio_crb_errors[icrbb.b_ecode], cnode);
			/*NOTREACHED*/
		
		default:
			panic("Fatal error (code : %d) on hub : %d",
				icrbb.b_ecode, cnode);
			/*NOTREACHED*/

		}
		} 	/* if (icrbb.b_error) */	

		/*
		 * Error is not indicated via the errcode field 
		 * Check other error indications in this register.
		 */
		
		if (icrbb.b_xerr) {
			panic("Xtalk Packet with error bit set to hub %d",
				cnode);
			/*NOTREACHED*/
		}

		if (icrbb.b_lnetuce) {
			panic("Uncorrectable data error detected on data "
				" from Craylink to node %d",
				cnode);
			/*NOTREACHED*/
		}

	}
コード例 #3
0
ファイル: huberror.c プロジェクト: nearffxx/xpenology
void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
{
	nasid_t nasid;
	ii_icrb0_a_u_t icrba;	/* II CRB Register A */
	ii_icrb0_b_u_t icrbb;	/* II CRB Register B */
	ii_icrb0_c_u_t icrbc;	/* II CRB Register C */
	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
	ii_icrb0_e_u_t icrbe;	/* II CRB Register D */
	int i;
	int num_errors = 0;	/* Num of errors handled */
	ioerror_t ioerror;

	nasid = hubdev_info->hdi_nasid;

	/*
	 * XXX - Add locking for any recovery actions
	 */
	/*
	 * Scan through all CRBs in the Hub, and handle the errors
	 * in any of the CRBs marked.
	 */
	for (i = 0; i < IIO_NUM_CRBS; i++) {
		/* Check this crb entry to see if it is in error. */
		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));

		if (icrbb.b_mark == 0) {
			continue;
		}

		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));

		IOERROR_INIT(&ioerror);

		/* read other CRB error registers. */
		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));

		IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode);

		/* Check if this error is due to BTE operation,
		 * and handle it separately.
		 */
		if (icrbd.d_bteop ||
		    ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
		      icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
		     (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
		      icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) {

			int bte_num;

			if (icrbd.d_bteop)
				bte_num = icrbc.c_btenum;
			else	/* b_initiator bit 2 gives BTE number */
				bte_num = (icrbb.b_initiator & 0x4) >> 2;

			hubiio_crb_free(hubdev_info, i);

			bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num,
					      i, &ioerror, icrbd.d_bteop);
			num_errors++;
			continue;
		}
	}
コード例 #4
0
ファイル: bte_error.c プロジェクト: 12019/hg556a_source
/*
 * Second part error handler.  Wait until all BTE related CRBs are completed
 * and then reset the interfaces.
 */
void
bte_error_handler(unsigned long _nodepda)
{
	struct nodepda_s *err_nodepda = (struct nodepda_s *) _nodepda;
	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
	nasid_t nasid;
	int i;
	int valid_crbs;
	unsigned long irq_flags;
	volatile u64 *notify;
	bte_result_t bh_error;
	ii_imem_u_t imem;	/* II IMEM Register */
	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
	ii_ibcr_u_t ibcr;
	ii_icmr_u_t icmr;
	ii_ieclr_u_t ieclr;


	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
		    smp_processor_id()));

	spin_lock_irqsave(recovery_lock, irq_flags);

	if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
			    smp_processor_id()));
		spin_unlock_irqrestore(recovery_lock, irq_flags);
		return;
	}
	/*
	 * Lock all interfaces on this node to prevent new transfers
	 * from being queued.
	 */
	for (i = 0; i < BTES_PER_NODE; i++) {
		if (err_nodepda->bte_if[i].cleanup_active) {
			continue;
		}
		spin_lock(&err_nodepda->bte_if[i].spinlock);
		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
			    smp_processor_id(), i));
		err_nodepda->bte_if[i].cleanup_active = 1;
	}

	/* Determine information about our hub */
	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);


	/*
	 * A BTE transfer can use multiple CRBs.  We need to make sure
	 * that all the BTE CRBs are complete (or timed out) before
	 * attempting to clean up the error.  Resetting the BTE while
	 * there are still BTE CRBs active will hang the BTE.
	 * We should look at all the CRBs to see if they are allocated
	 * to the BTE and see if they are still active.  When none
	 * are active, we can continue with the cleanup.
	 *
	 * We also want to make sure that the local NI port is up.
	 * When a router resets the NI port can go down, while it
	 * goes through the LLP handshake, but then comes back up.
	 */
	icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
	if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
		/*
		 * There are errors which still need to be cleaned up by
		 * hubiio_crb_error_handler
		 */
		mod_timer(recovery_timer, HZ * 5);
		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
			    smp_processor_id()));
		spin_unlock_irqrestore(recovery_lock, irq_flags);
		return;
	}
	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {

		valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;

		for (i = 0; i < IIO_NUM_CRBS; i++) {
			if (!((1 << i) & valid_crbs)) {
				/* This crb was not marked as valid, ignore */
				continue;
			}
			icrbd.ii_icrb0_d_regval =
			    REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
			if (icrbd.d_bteop) {
				mod_timer(recovery_timer, HZ * 5);
				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
					 err_nodepda, smp_processor_id(), i));
				spin_unlock_irqrestore(recovery_lock,
						       irq_flags);
				return;
			}
		}
	}


	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda,
		    smp_processor_id()));
	/* Reenable both bte interfaces */
	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);

	/* Clear IBLS0/1 error bits */
	ieclr.ii_ieclr_regval = 0;
	if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
		ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
	if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
                ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
	REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);

	/* Reinitialize both BTE state machines. */
	ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
	ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);


	for (i = 0; i < BTES_PER_NODE; i++) {
		bh_error = err_nodepda->bte_if[i].bh_error;
		if (bh_error != BTE_SUCCESS) {
			/* There is an error which needs to be notified */
			notify = err_nodepda->bte_if[i].most_rcnt_na;
			BTE_PRINTK(("cnode %d bte %d error=0x%lx\n",
				    err_nodepda->bte_if[i].bte_cnode,
				    err_nodepda->bte_if[i].bte_num,
				    IBLS_ERROR | (u64) bh_error));
			*notify = IBLS_ERROR | bh_error;
			err_nodepda->bte_if[i].bh_error = BTE_SUCCESS;
		}

		err_nodepda->bte_if[i].cleanup_active = 0;
		BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda,
			    smp_processor_id(), i));
		spin_unlock(&err_nodepda->bte_if[i].spinlock);
	}

	del_timer(recovery_timer);

	spin_unlock_irqrestore(recovery_lock, irq_flags);
}