Exemple #1
0
int
hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
{
	cnodeid_t	cnode;
	nasid_t		nasid;
	ii_icrb0_a_u_t		icrba;		/* II CRB Register A */
	ii_icrb0_b_u_t		icrbb;		/* II CRB Register B */
	ii_icrb0_c_u_t		icrbc;		/* II CRB Register C */
	ii_icrb0_d_u_t		icrbd;		/* II CRB Register D */
	ii_icrb0_e_u_t		icrbe;		/* II CRB Register D */
	int		i;
	int		num_errors = 0;	/* Num of errors handled */
	ioerror_t	ioerror;
	int		rc;

	nasid = hinfo->h_nasid;
	cnode = NASID_TO_COMPACT_NODEID(nasid);

	/*
	 * XXX - Add locking for any recovery actions
	 */
	/*
	 * Scan through all CRBs in the Hub, and handle the errors
	 * in any of the CRBs marked.
	 */
	for (i = 0; i < IIO_NUM_CRBS; i++) {
		/* Check this crb entry to see if it is in error. */
		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));

		if (icrbb.b_mark == 0) {
			continue;
		}

		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));

		IOERROR_INIT(&ioerror);

		/* read other CRB error registers. */
		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));

		IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

		/* Check if this error is due to BTE operation,
		* and handle it separately.
		*/
		if (icrbd.d_bteop ||
			((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
			icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
			(icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
			icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))){

			int bte_num;

			if (icrbd.d_bteop)
				bte_num = icrbc.c_btenum;
			else /* b_initiator bit 2 gives BTE number */
				bte_num = (icrbb.b_initiator & 0x4) >> 2;

			hubiio_crb_free(hinfo, i);

			bte_crb_error_handler(hub_v, bte_num,
					      i, &ioerror,
					      icrbd.d_bteop);
			num_errors++;
			continue;
		}

		/*
		 * XXX
		 * Assuming the only other error that would reach here is
		 * crosstalk errors. 
		 * If CRB times out on a message from Xtalk, it changes 
		 * the message type to CRB. 
		 *
		 * If we get here due to other errors (SN0net/CRB)
		 * what's the action ?
		 */

		/*
		 * Pick out the useful fields in CRB, and
		 * tuck them away into ioerror structure.
		 */
		IOERROR_SETVALUE(&ioerror,xtalkaddr,icrba.a_addr << IIO_ICRB_ADDR_SHFT);
		IOERROR_SETVALUE(&ioerror,widgetnum,icrba.a_sidn);


		if (icrba.a_iow){
			/*
			 * XXX We shouldn't really have BRIDGE-specific code
			 * here, but alas....
			 *
			 * The BRIDGE (or XBRIDGE) sets the upper bit of TNUM
			 * to indicate a WRITE operation.  It sets the next
			 * bit to indicate an INTERRUPT operation.  The bottom
			 * 3 bits of TNUM indicate which device was responsible.
			 */
			IOERROR_SETVALUE(&ioerror,widgetdev,
					 TNUM_TO_WIDGET_DEV(icrba.a_tnum));
			/*
			* The encoding of TNUM (see comments above) is
			* different for PIC. So we'll save TNUM here and
			* deal with the differences later when we can
			* determine if we're using a Bridge or the PIC.
			*
			* XXX:  We may be able to remove saving the widgetdev
			* above and just sort it out of TNUM later.
			*/
			IOERROR_SETVALUE(&ioerror, tnum, icrba.a_tnum);

		}
		if (icrbb.b_error) {
		    /*
		     * CRB 'i' has some error. Identify the type of error,
		     * and try to handle it.
		     *
		     */
		    switch(icrbb.b_ecode) {
			case IIO_ICRB_ECODE_PERR:
			case IIO_ICRB_ECODE_WERR:
			case IIO_ICRB_ECODE_AERR:
			case IIO_ICRB_ECODE_PWERR:
			case IIO_ICRB_ECODE_TOUT:
			case IIO_ICRB_ECODE_XTERR:
			    printk("Shub II CRB %d: error %s on hub cnodeid: %d",
				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
			    /*
			     * Any sort of write error is mostly due
			     * bad programming (Note it's not a timeout.)
			     * So, invoke hub_iio_error_handler with
			     * appropriate information.
			     */
			    IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

			    /* Go through the error bit lookup phase */
			    if (error_state_set(hub_v, ERROR_STATE_LOOKUP) ==
				    ERROR_RETURN_CODE_CANNOT_SET_STATE)
				return(IOERROR_UNHANDLED);
			    rc = hub_ioerror_handler(
				    hub_v,
				    DMA_WRITE_ERROR,
				    MODE_DEVERROR,
				    &ioerror);
			    if (rc == IOERROR_HANDLED) {
				rc = hub_ioerror_handler(
					hub_v,
					DMA_WRITE_ERROR,
					MODE_DEVREENABLE,
					&ioerror);
			    }else {
				printk("Unable to handle %s on hub %d",
					hubiio_crb_errors[icrbb.b_ecode],
					cnode);
				/* panic; */
			    }
			    /* Go to Next error */
			    print_crb_fields(i, icrba, icrbb, icrbc,
				    icrbd, icrbe);
			    hubiio_crb_free(hinfo, i);
			    continue;
			case IIO_ICRB_ECODE_PRERR:
			case IIO_ICRB_ECODE_DERR:
			    printk("Shub II CRB %d: error %s on hub : %d",
				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
			    /* panic */
			default:
			    printk("Shub II CRB error (code : %d) on hub : %d",
				    icrbb.b_ecode, cnode);
			    /* panic */
		    }
		} 
		/*
		 * Error is not indicated via the errcode field
		 * Check other error indications in this register.
		 */
		if (icrbb.b_xerr) {
		    printk("Shub II CRB %d: Xtalk Packet with error bit set to hub %d",
			    i, cnode);
		    /* panic */
		}
		if (icrbb.b_lnetuce) {
		    printk("Shub II CRB %d: Uncorrectable data error detected on data "
			    " from NUMAlink to node %d",
			    i, cnode);
		    /* panic */
		}
		print_crb_fields(i, icrba, icrbb, icrbc, icrbd, icrbe);





		if (icrbb.b_error) {
		/* 
		 * CRB 'i' has some error. Identify the type of error,
		 * and try to handle it.
		 */
		switch(icrbb.b_ecode) {
		case IIO_ICRB_ECODE_PERR:
		case IIO_ICRB_ECODE_WERR:
		case IIO_ICRB_ECODE_AERR:
		case IIO_ICRB_ECODE_PWERR:

			printk("%s on hub cnodeid: %d",
				hubiio_crb_errors[icrbb.b_ecode], cnode);
			/*
			 * Any sort of write error is mostly due
			 * bad programming (Note it's not a timeout.)
			 * So, invoke hub_iio_error_handler with
			 * appropriate information.
			 */
			IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

			rc = hub_ioerror_handler(
					hub_v, 
					DMA_WRITE_ERROR, 
					MODE_DEVERROR, 
					&ioerror);

                        if (rc == IOERROR_HANDLED) {
                                rc = hub_ioerror_handler(
                                        hub_v,
                                        DMA_WRITE_ERROR,
                                        MODE_DEVREENABLE,
                                        &ioerror);
                                ASSERT(rc == IOERROR_HANDLED);
                        }else {

				panic("Unable to handle %s on hub %d",
					hubiio_crb_errors[icrbb.b_ecode],
					cnode);
				/*NOTREACHED*/
			}
			/* Go to Next error */
			hubiio_crb_free(hinfo, i);
			continue;

		case IIO_ICRB_ECODE_PRERR:

                case IIO_ICRB_ECODE_TOUT:
                case IIO_ICRB_ECODE_XTERR:

		case IIO_ICRB_ECODE_DERR:
			panic("Fatal %s on hub : %d",
				hubiio_crb_errors[icrbb.b_ecode], cnode);
			/*NOTREACHED*/
		
		default:
			panic("Fatal error (code : %d) on hub : %d",
				icrbb.b_ecode, cnode);
			/*NOTREACHED*/

		}
		} 	/* if (icrbb.b_error) */	

		/*
		 * Error is not indicated via the errcode field 
		 * Check other error indications in this register.
		 */
		
		if (icrbb.b_xerr) {
			panic("Xtalk Packet with error bit set to hub %d",
				cnode);
			/*NOTREACHED*/
		}

		if (icrbb.b_lnetuce) {
			panic("Uncorrectable data error detected on data "
				" from Craylink to node %d",
				cnode);
			/*NOTREACHED*/
		}

	}
Exemple #2
0
void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
{
	nasid_t nasid;
	ii_icrb0_a_u_t icrba;	/* II CRB Register A */
	ii_icrb0_b_u_t icrbb;	/* II CRB Register B */
	ii_icrb0_c_u_t icrbc;	/* II CRB Register C */
	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
	ii_icrb0_e_u_t icrbe;	/* II CRB Register D */
	int i;
	int num_errors = 0;	/* Num of errors handled */
	ioerror_t ioerror;

	nasid = hubdev_info->hdi_nasid;

	/*
	 * XXX - Add locking for any recovery actions
	 */
	/*
	 * Scan through all CRBs in the Hub, and handle the errors
	 * in any of the CRBs marked.
	 */
	for (i = 0; i < IIO_NUM_CRBS; i++) {
		/* Check this crb entry to see if it is in error. */
		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));

		if (icrbb.b_mark == 0) {
			continue;
		}

		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));

		IOERROR_INIT(&ioerror);

		/* read other CRB error registers. */
		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));

		IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode);

		/* Check if this error is due to BTE operation,
		 * and handle it separately.
		 */
		if (icrbd.d_bteop ||
		    ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
		      icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
		     (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
		      icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) {

			int bte_num;

			if (icrbd.d_bteop)
				bte_num = icrbc.c_btenum;
			else	/* b_initiator bit 2 gives BTE number */
				bte_num = (icrbb.b_initiator & 0x4) >> 2;

			hubiio_crb_free(hubdev_info, i);

			bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num,
					      i, &ioerror, icrbd.d_bteop);
			num_errors++;
			continue;
		}
	}
Exemple #3
0
/* 
 * >>> bte_crb_error_handler needs to be broken into two parts.  The
 * first should cleanup the CRB.  The second should wait until all bte
 * related CRB's are complete and then do the error reset.
 */
void
bte_crb_error_handler(devfs_handle_t hub_v, int btenum, 
		      int crbnum, ioerror_t *ioe, int bteop)
/*
 * Function: 	bte_crb_error_handler
 * Purpose:	Process a CRB for a specific HUB/BTE
 * Parameters:	hub_v	- vertex of hub in HW graph
 *		btenum	- bte number on hub (0 == a, 1 == b)
 *		crbnum	- crb number being processed
 * Notes: 
 *	This routine assumes serialization at a higher level. A CRB 
 *	should not be processed more than once. The error recovery 
 *	follows the following sequence - if you change this, be real
 *	sure about what you are doing. 
 *
 */
{
        hubinfo_t	hinfo;
	icrba_t		crba; 
	icrbb_t		crbb; 
	nasid_t		n;
	hubreg_t	iidsr, imem, ieclr;

	hubinfo_get(hub_v, &hinfo);


	n = hinfo->h_nasid;
	

	/*
	 * The following 10 lines (or so) are adapted from IRIXs
	 * bte_crb_error function.  No clear documentation tells
	 * why the crb needs to complete normally in order for
	 * the BTE to resume normal operations.  This first step
	 * appears vital!
	 */

	/*
	 * Zero error and error code to prevent error_dump complaining
	 * about these CRBs. Copy the CRB to the notification line.
	 * The crb address is in shub format (physical address shifted
	 * right by cacheline size).
	 */
	crbb.ii_icrb0_b_regval = REMOTE_HUB_L(n, IIO_ICRB_B(crbnum));
	crbb.b_error=0;
	crbb.b_ecode=0;
	REMOTE_HUB_S(n, IIO_ICRB_B(crbnum), crbb.ii_icrb0_b_regval);

	crba.ii_icrb0_a_regval = REMOTE_HUB_L(n, IIO_ICRB_A(crbnum));
	crba.a_addr = TO_PHYS((u64)&nodepda->bte_if[btenum].notify) >> 3;
	crba.a_valid = 1;
	REMOTE_HUB_S(n, IIO_ICRB_A(crbnum), crba.ii_icrb0_a_regval);

	REMOTE_HUB_S(n, IIO_ICCR, 
		     IIO_ICCR_PENDING | IIO_ICCR_CMD_FLUSH | crbnum);

	while (REMOTE_HUB_L(n, IIO_ICCR) & IIO_ICCR_PENDING)
	    ;


	/* Terminate the BTE. */
	/* >>> The other bte transfer will need to be restarted. */
	HUB_L((shubreg_t *)((nodepda->bte_if[btenum].bte_base_addr +
		       IIO_IBCT0 - IIO_IBLS0)));

	imem = REMOTE_HUB_L(n, IIO_IMEM);
	ieclr = REMOTE_HUB_L(n, IIO_IECLR);
	if (btenum == 0) {
		imem |= IIO_IMEM_W0ESD | IIO_IMEM_B0ESD;
		ieclr|= IECLR_BTE0;
	} else {
		imem |= IIO_IMEM_W0ESD | IIO_IMEM_B1ESD;
		ieclr|= IECLR_BTE1;
	}
	REMOTE_HUB_S(n, IIO_IMEM, imem);
	REMOTE_HUB_S(n, IIO_IECLR, ieclr);
		
	iidsr  = REMOTE_HUB_L(n, IIO_IIDSR);
	iidsr &= ~IIO_IIDSR_SENT_MASK;
	iidsr |= IIO_IIDSR_ENB_MASK;
	REMOTE_HUB_S(n, IIO_IIDSR, iidsr);


 	bte_reset_nasid(n);

	*nodepda->bte_if[btenum].most_rcnt_na = IBLS_ERROR;
}