示例#1
0
void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
{
	nasid_t nasid;
	ii_icrb0_a_u_t icrba;	/* II CRB Register A */
	ii_icrb0_b_u_t icrbb;	/* II CRB Register B */
	ii_icrb0_c_u_t icrbc;	/* II CRB Register C */
	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
	ii_icrb0_e_u_t icrbe;	/* II CRB Register D */
	int i;
	int num_errors = 0;	/* Num of errors handled */
	ioerror_t ioerror;

	nasid = hubdev_info->hdi_nasid;

	/*
	 * XXX - Add locking for any recovery actions
	 */
	/*
	 * Scan through all CRBs in the Hub, and handle the errors
	 * in any of the CRBs marked.
	 */
	for (i = 0; i < IIO_NUM_CRBS; i++) {
		/* Check this crb entry to see if it is in error. */
		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));

		if (icrbb.b_mark == 0) {
			continue;
		}

		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));

		IOERROR_INIT(&ioerror);

		/* read other CRB error registers. */
		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));

		IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode);

		/* Check if this error is due to BTE operation,
		 * and handle it separately.
		 */
		if (icrbd.d_bteop ||
		    ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
		      icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
		     (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
		      icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) {

			int bte_num;

			if (icrbd.d_bteop)
				bte_num = icrbc.c_btenum;
			else	/* b_initiator bit 2 gives BTE number */
				bte_num = (icrbb.b_initiator & 0x4) >> 2;

			hubiio_crb_free(hubdev_info, i);

			bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num,
					      i, &ioerror, icrbd.d_bteop);
			num_errors++;
			continue;
		}
	}
示例#2
0
int
hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
{
	cnodeid_t	cnode;
	nasid_t		nasid;
	ii_icrb0_a_u_t		icrba;		/* II CRB Register A */
	ii_icrb0_b_u_t		icrbb;		/* II CRB Register B */
	ii_icrb0_c_u_t		icrbc;		/* II CRB Register C */
	ii_icrb0_d_u_t		icrbd;		/* II CRB Register D */
	ii_icrb0_e_u_t		icrbe;		/* II CRB Register D */
	int		i;
	int		num_errors = 0;	/* Num of errors handled */
	ioerror_t	ioerror;
	int		rc;

	nasid = hinfo->h_nasid;
	cnode = NASID_TO_COMPACT_NODEID(nasid);

	/*
	 * XXX - Add locking for any recovery actions
	 */
	/*
	 * Scan through all CRBs in the Hub, and handle the errors
	 * in any of the CRBs marked.
	 */
	for (i = 0; i < IIO_NUM_CRBS; i++) {
		/* Check this crb entry to see if it is in error. */
		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));

		if (icrbb.b_mark == 0) {
			continue;
		}

		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));

		IOERROR_INIT(&ioerror);

		/* read other CRB error registers. */
		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));

		IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

		/* Check if this error is due to BTE operation,
		* and handle it separately.
		*/
		if (icrbd.d_bteop ||
			((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
			icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
			(icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
			icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))){

			int bte_num;

			if (icrbd.d_bteop)
				bte_num = icrbc.c_btenum;
			else /* b_initiator bit 2 gives BTE number */
				bte_num = (icrbb.b_initiator & 0x4) >> 2;

			hubiio_crb_free(hinfo, i);

			bte_crb_error_handler(hub_v, bte_num,
					      i, &ioerror,
					      icrbd.d_bteop);
			num_errors++;
			continue;
		}

		/*
		 * XXX
		 * Assuming the only other error that would reach here is
		 * crosstalk errors. 
		 * If CRB times out on a message from Xtalk, it changes 
		 * the message type to CRB. 
		 *
		 * If we get here due to other errors (SN0net/CRB)
		 * what's the action ?
		 */

		/*
		 * Pick out the useful fields in CRB, and
		 * tuck them away into ioerror structure.
		 */
		IOERROR_SETVALUE(&ioerror,xtalkaddr,icrba.a_addr << IIO_ICRB_ADDR_SHFT);
		IOERROR_SETVALUE(&ioerror,widgetnum,icrba.a_sidn);


		if (icrba.a_iow){
			/*
			 * XXX We shouldn't really have BRIDGE-specific code
			 * here, but alas....
			 *
			 * The BRIDGE (or XBRIDGE) sets the upper bit of TNUM
			 * to indicate a WRITE operation.  It sets the next
			 * bit to indicate an INTERRUPT operation.  The bottom
			 * 3 bits of TNUM indicate which device was responsible.
			 */
			IOERROR_SETVALUE(&ioerror,widgetdev,
					 TNUM_TO_WIDGET_DEV(icrba.a_tnum));
			/*
			* The encoding of TNUM (see comments above) is
			* different for PIC. So we'll save TNUM here and
			* deal with the differences later when we can
			* determine if we're using a Bridge or the PIC.
			*
			* XXX:  We may be able to remove saving the widgetdev
			* above and just sort it out of TNUM later.
			*/
			IOERROR_SETVALUE(&ioerror, tnum, icrba.a_tnum);

		}
		if (icrbb.b_error) {
		    /*
		     * CRB 'i' has some error. Identify the type of error,
		     * and try to handle it.
		     *
		     */
		    switch(icrbb.b_ecode) {
			case IIO_ICRB_ECODE_PERR:
			case IIO_ICRB_ECODE_WERR:
			case IIO_ICRB_ECODE_AERR:
			case IIO_ICRB_ECODE_PWERR:
			case IIO_ICRB_ECODE_TOUT:
			case IIO_ICRB_ECODE_XTERR:
			    printk("Shub II CRB %d: error %s on hub cnodeid: %d",
				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
			    /*
			     * Any sort of write error is mostly due
			     * bad programming (Note it's not a timeout.)
			     * So, invoke hub_iio_error_handler with
			     * appropriate information.
			     */
			    IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

			    /* Go through the error bit lookup phase */
			    if (error_state_set(hub_v, ERROR_STATE_LOOKUP) ==
				    ERROR_RETURN_CODE_CANNOT_SET_STATE)
				return(IOERROR_UNHANDLED);
			    rc = hub_ioerror_handler(
				    hub_v,
				    DMA_WRITE_ERROR,
				    MODE_DEVERROR,
				    &ioerror);
			    if (rc == IOERROR_HANDLED) {
				rc = hub_ioerror_handler(
					hub_v,
					DMA_WRITE_ERROR,
					MODE_DEVREENABLE,
					&ioerror);
			    }else {
				printk("Unable to handle %s on hub %d",
					hubiio_crb_errors[icrbb.b_ecode],
					cnode);
				/* panic; */
			    }
			    /* Go to Next error */
			    print_crb_fields(i, icrba, icrbb, icrbc,
				    icrbd, icrbe);
			    hubiio_crb_free(hinfo, i);
			    continue;
			case IIO_ICRB_ECODE_PRERR:
			case IIO_ICRB_ECODE_DERR:
			    printk("Shub II CRB %d: error %s on hub : %d",
				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
			    /* panic */
			default:
			    printk("Shub II CRB error (code : %d) on hub : %d",
				    icrbb.b_ecode, cnode);
			    /* panic */
		    }
		} 
		/*
		 * Error is not indicated via the errcode field
		 * Check other error indications in this register.
		 */
		if (icrbb.b_xerr) {
		    printk("Shub II CRB %d: Xtalk Packet with error bit set to hub %d",
			    i, cnode);
		    /* panic */
		}
		if (icrbb.b_lnetuce) {
		    printk("Shub II CRB %d: Uncorrectable data error detected on data "
			    " from NUMAlink to node %d",
			    i, cnode);
		    /* panic */
		}
		print_crb_fields(i, icrba, icrbb, icrbc, icrbd, icrbe);





		if (icrbb.b_error) {
		/* 
		 * CRB 'i' has some error. Identify the type of error,
		 * and try to handle it.
		 */
		switch(icrbb.b_ecode) {
		case IIO_ICRB_ECODE_PERR:
		case IIO_ICRB_ECODE_WERR:
		case IIO_ICRB_ECODE_AERR:
		case IIO_ICRB_ECODE_PWERR:

			printk("%s on hub cnodeid: %d",
				hubiio_crb_errors[icrbb.b_ecode], cnode);
			/*
			 * Any sort of write error is mostly due
			 * bad programming (Note it's not a timeout.)
			 * So, invoke hub_iio_error_handler with
			 * appropriate information.
			 */
			IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);

			rc = hub_ioerror_handler(
					hub_v, 
					DMA_WRITE_ERROR, 
					MODE_DEVERROR, 
					&ioerror);

                        if (rc == IOERROR_HANDLED) {
                                rc = hub_ioerror_handler(
                                        hub_v,
                                        DMA_WRITE_ERROR,
                                        MODE_DEVREENABLE,
                                        &ioerror);
                                ASSERT(rc == IOERROR_HANDLED);
                        }else {

				panic("Unable to handle %s on hub %d",
					hubiio_crb_errors[icrbb.b_ecode],
					cnode);
				/*NOTREACHED*/
			}
			/* Go to Next error */
			hubiio_crb_free(hinfo, i);
			continue;

		case IIO_ICRB_ECODE_PRERR:

                case IIO_ICRB_ECODE_TOUT:
                case IIO_ICRB_ECODE_XTERR:

		case IIO_ICRB_ECODE_DERR:
			panic("Fatal %s on hub : %d",
				hubiio_crb_errors[icrbb.b_ecode], cnode);
			/*NOTREACHED*/
		
		default:
			panic("Fatal error (code : %d) on hub : %d",
				icrbb.b_ecode, cnode);
			/*NOTREACHED*/

		}
		} 	/* if (icrbb.b_error) */	

		/*
		 * Error is not indicated via the errcode field 
		 * Check other error indications in this register.
		 */
		
		if (icrbb.b_xerr) {
			panic("Xtalk Packet with error bit set to hub %d",
				cnode);
			/*NOTREACHED*/
		}

		if (icrbb.b_lnetuce) {
			panic("Uncorrectable data error detected on data "
				" from Craylink to node %d",
				cnode);
			/*NOTREACHED*/
		}

	}