/* Algorithm:
     1. Store the difference of old data and new data in the Rod buffer.
     2. then encode this buffer into the buffer which already have old 'E' information inside it,
	the result can be shown to be the new 'E' information.
     3. xor the Wnd buffer into the difference buffer to recover the  original old data.
   Here we have another alternative: to allocate a temporary buffer for storing the difference of
   old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
   take the same speed as the previous, and need more memory.
*/
int
rf_RegularONEFunc(RF_DagNode_t *node)
{
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
								 * where you can find
								 * e-pda */
	int     i, k;
	int     suoffset, length;
	RF_RowCol_t scol;
	char   *srcbuf, *destbuf;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
	RF_Etimer_t timer;
	RF_PhysDiskAddr_t *pda;
#ifdef RAID_DIAGNOSTIC
	RF_PhysDiskAddr_t *EPDA =
	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);

	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
	RF_ASSERT(ESUOffset == 0);
#endif /* RAID_DIAGNOSTIC */

	RF_ETIMER_START(timer);

	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
	 * new data is stored in Rod buffer */
	for (k = 0; k < EpdaIndex; k += 2) {
		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
	}
	/* Start to encoding the buffer storing the difference of old data and
	 * new data into 'E' buffer  */
	for (i = 0; i < EpdaIndex; i += 2)
		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
									 * of E */
			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
			srcbuf = (char *) node->params[i + 1].p;
			scol = rf_EUCol(layoutPtr, pda->raidAddress);
			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
		}
	/* Recover the original old data to be used by parity encoding
	 * function in XorNode */
	for (k = 0; k < EpdaIndex; k += 2) {
		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
	}
	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	tracerec->q_us += RF_ETIMER_VAL_US(timer);
	rf_GenericWakeupFunc(node, 0);
#if 1
	return (0);		/* XXX this was missing.. GO */
#endif
}
/**************************************************************************************
 * This function is used in the case where one data and the parity have filed.
 * (in EO_110_CreateWriteDAG )
 **************************************************************************************/
int
rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
{
	rf_DegrESubroutine(node, node->results[0]);
	rf_GenericWakeupFunc(node, 0);
#if 1
	return (0);		/* XXX Yet another one!! GO */
#endif
}
/*******************************************************************************************
 *			 Used in  EO_001_CreateLargeWriteDAG
 ******************************************************************************************/
int
rf_RegularEFunc(RF_DagNode_t *node)
{
	rf_RegularESubroutine(node, node->results[0]);
	rf_GenericWakeupFunc(node, 0);
#if 1
	return (0);		/* XXX this was missing?.. GO */
#endif
}
/**************************************************************************************
 * when parity die and one data die, We use second redundant information, 'E',
 * to recover the data in dead disk. This function is used in the recovery node of
 * for EO_110_CreateReadDAG
 **************************************************************************************/
int
rf_RecoveryEFunc(RF_DagNode_t *node)
{
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
	RF_RowCol_t scol,	/* source logical column */
	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
									 * failed SU */
	int     i;
	RF_PhysDiskAddr_t *pda;
	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
	char   *srcbuf, *destbuf;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
	RF_Etimer_t timer;

	memset((char *) node->results[0], 0,
	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
	if (node->dagHdr->status == rf_enable) {
		RF_ETIMER_START(timer);
		for (i = 0; i < node->numParams - 2; i += 2)
			if (node->params[i + 1].p != node->results[0]) {
				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
				if (i == node->numParams - 4)
					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
									 * redundant E */
				else
					scol = rf_EUCol(layoutPtr, pda->raidAddress);
				srcbuf = (char *) node->params[i + 1].p;
				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
			}
		RF_ETIMER_STOP(timer);
		RF_ETIMER_EVAL(timer);
		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
	}
	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
}
int
rf_SimpleONEFunc(RF_DagNode_t *node)
{
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
	int     retcode = 0;
	char   *srcbuf, *destbuf;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
	int     length;
	RF_RowCol_t scol;
	RF_Etimer_t timer;

	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
	if (node->dagHdr->status == rf_enable) {
		RF_ETIMER_START(timer);
		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
														 * writeDataNodes */
		/* bxor to buffer of readDataNodes */
		retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
		/* find out the corresponding colume in encoding matrix for
		 * write colume to be encoded into redundant disk 'E' */
		scol = rf_EUCol(layoutPtr, pda->raidAddress);
		srcbuf = node->params[1].p;
		destbuf = node->params[3].p;
		/* Start encoding process */
		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
		rf_bxor(node->params[5].p, node->params[1].p, length);
		RF_ETIMER_STOP(timer);
		RF_ETIMER_EVAL(timer);
		tracerec->q_us += RF_ETIMER_VAL_US(timer);

	}
	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
							 * explicitly since no
							 * I/O in this node */
}
Esempio n. 6
0
int
rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *node)
{
	/*
	 * The situation:
	 *
	 * We are doing a write that hits only one failed data unit. The other
	 * failed data unit is not being overwritten, so we need to generate
	 * it.
	 *
	 * For the moment, we assume all the nonfailed data being written is in
	 * the shadow of the failed data unit. (i.e., either a single data
	 * unit write or the entire failed stripe unit is being overwritten.)
	 *
	 * Recovery strategy: apply the recovery data to the parity and Q.
	 * Use P & Q to recover the second failed data unit in P. Zero fill
	 * Q, then apply the recovered data to P. Then apply the data being
	 * written to the failed drive. Then walk through the surviving drives,
	 * applying new data when it exists, othewise the recovery data.
	 * Quite a mess.
	 *
	 *
	 * The params:
	 *
	 *   read pda0, read pda1, ..., read pda (numDataCol-3),
	 *   write pda0, ..., write pda (numStripeUnitAccess - numDataFailed),
	 *   failed pda, raidPtr, asmap
	 */

	int np = node->numParams;
	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
	    node->params[np - 1].p;
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
	int i;
	RF_RaidAddr_t sosAddr;
	unsigned coeff;
	RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
	RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda;
	int numDataCol = layoutPtr->numDataCol;
	RF_Etimer_t timer;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;

	RF_ASSERT(node->numResults == 2);
	RF_ASSERT(asmap->failedPDAs[1] == NULL);
	RF_ETIMER_START(timer);
	ppda = node->results[0];
	qpda = node->results[1];
	/* apply the recovery data */
	for (i = 0; i < numDataCol - 2; i++)
		rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda,
		    node->dagHdr->bp);

	/* Determine the other failed data unit. */
	pda = asmap->failedPDAs[0];
	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
	    asmap->raidAddress);
	/* Need to determine the column of the other failed disk. */
	coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
	/* Compute the data unit offset within the column. */
	coeff = (coeff % raidPtr->Layout.numDataCol);
	for (i = 0; i < numDataCol; i++) {
		npda.raidAddress = sosAddr + (i * secPerSU);
		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
		    &(npda.row), &(npda.col), &(npda.startSector), 0);
		/* Skip over dead disks. */
		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
			if (i != coeff)
				break;
	}
	RF_ASSERT(i < numDataCol);
	/*
	 * Recover the data. The column we want to recover, we write over the
	 * parity. The column we don't care about, we dump in q.
	 */
	if (coeff < i)		/* Recovering 'a'. */
		rf_PQ_recover((unsigned long *) ppda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    (unsigned long *) ppda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
	else			/* Recovering 'b'. */
		rf_PQ_recover((unsigned long *) ppda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    (unsigned long *) ppda->bufPtr,
		    rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);

	/* OK. The valid data is in P. Zero fill Q, then inc it into it. */
	bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector));
	rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr,
	    rf_RaidAddressToByte(raidPtr, qpda->numSector), i);

	/* Now apply all the write data to the buffer. */
	/*
	 * Single stripe unit write case: The failed data is the only thing
	 * we are writing.
	 */
	RF_ASSERT(asmap->numStripeUnitsAccessed == 1);
	/* Dest, src, len, coeff. */
	rf_IncQ((unsigned long *) qpda->bufPtr,
	    (unsigned long *) asmap->failedPDAs[0]->bufPtr,
	    rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff);
	rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr,
	    rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp);

	/* Now apply all the recovery data. */
	for (i = 0; i < numDataCol - 2; i++)
		rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda,
		    node->dagHdr->bp);

	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	if (tracerec)
		tracerec->q_us += RF_ETIMER_VAL_US(timer);

	rf_GenericWakeupFunc(node, 0);
	return (0);
}
Esempio n. 7
0
int
rf_PQDoubleRecoveryFunc(RF_DagNode_t *node)
{
	int np = node->numParams;
	RF_AccessStripeMap_t *asmap =
	    (RF_AccessStripeMap_t *) node->params[np - 1].p;
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
	int d, i;
	unsigned coeff;
	RF_RaidAddr_t sosAddr, suoffset;
	RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit;
	int two = 0;
	RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda;
	char *buf;
	int numDataCol = layoutPtr->numDataCol;
	RF_Etimer_t timer;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;

	RF_ETIMER_START(timer);

	if (asmap->failedPDAs[1] &&
	    (asmap->failedPDAs[1]->numSector +
	     asmap->failedPDAs[0]->numSector < secPerSU)) {
		RF_ASSERT(0);
		ppda = node->params[np - 6].p;
		ppda2 = node->params[np - 5].p;
		qpda = node->params[np - 4].p;
		qpda2 = node->params[np - 3].p;
		d = (np - 6);
		two = 1;
	} else {
		ppda = node->params[np - 4].p;
		qpda = node->params[np - 3].p;
		d = (np - 4);
	}

	for (i = 0; i < d; i++) {
		pda = node->params[i].p;
		buf = pda->bufPtr;
		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
		len = pda->numSector;
		coeff = rf_RaidAddressToStripeUnitID(layoutPtr,
		    pda->raidAddress);
		/* Compute the data unit offset within the column. */
		coeff = (coeff % raidPtr->Layout.numDataCol);
		/* See if pda intersects a recovery pda. */
		rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
		if (two)
			rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
	}

	/*
	 * Ok, we got the parity back to the point where we can recover. We
	 * now need to determine the coeff of the columns that need to be
	 * recovered. We can also only need to recover a single stripe unit.
	 */

	if (asmap->failedPDAs[1] == NULL) {	/*
						 * Only a single stripe unit
						 * to recover.
						 */
		pda = asmap->failedPDAs[0];
		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
		    asmap->raidAddress);
		/* Need to determine the column of the other failed disk. */
		coeff = rf_RaidAddressToStripeUnitID(layoutPtr,
		    pda->raidAddress);
		/* Compute the data unit offset within the column. */
		coeff = (coeff % raidPtr->Layout.numDataCol);
		for (i = 0; i < numDataCol; i++) {
			npda.raidAddress = sosAddr + (i * secPerSU);
			(raidPtr->Layout.map->MapSector) (raidPtr,
			    npda.raidAddress, &(npda.row), &(npda.col),
			    &(npda.startSector), 0);
			/* Skip over dead disks. */
			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col]
			    .status))
				if (i != coeff)
					break;
		}
		RF_ASSERT(i < numDataCol);
		RF_ASSERT(two == 0);
		/*
		 * Recover the data. Since we need only to recover one
		 * column, we overwrite the parity with the other one.
		 */
		if (coeff < i)	/* Recovering 'a'. */
			rf_PQ_recover((unsigned long *) ppda->bufPtr,
			    (unsigned long *) qpda->bufPtr,
			    (unsigned long *) pda->bufPtr,
			    (unsigned long *) ppda->bufPtr,
			    rf_RaidAddressToByte(raidPtr, pda->numSector),
			    coeff, i);
		else		/* Recovering 'b'. */
			rf_PQ_recover((unsigned long *) ppda->bufPtr,
			    (unsigned long *) qpda->bufPtr,
			    (unsigned long *) ppda->bufPtr,
			    (unsigned long *) pda->bufPtr,
			    rf_RaidAddressToByte(raidPtr, pda->numSector),
			    i, coeff);
	} else
		RF_PANIC();

	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	if (tracerec)
		tracerec->q_us += RF_ETIMER_VAL_US(timer);
	rf_GenericWakeupFunc(node, 0);
	return (0);
}