Example #1
0
void
rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *raidPtr,
				      RF_AccessStripeMap_t *asmap,
				      RF_DagHeader_t *dag_h, void *bp,
				      RF_RaidAccessFlags_t flags,
				      RF_AllocListElem_t *allocList,
				      int nfaults,
				      int (*redFunc) (RF_DagNode_t *),
				      int allowBufferRecycle)
{
	int     nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum,
	        rdnodesFaked;
	RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode;
	RF_DagNode_t *wndNodes, *rrdNodes, *xorNode, *commitNode;
	RF_DagNode_t *tmpNode, *tmpwndNode, *tmprrdNode;
	RF_SectorCount_t sectorsPerSU;
	RF_ReconUnitNum_t which_ru;
	char   *xorTargetBuf = NULL;	/* the target buffer for the XOR
					 * operation */
	char   overlappingPDAs[RF_MAXCOL];/* a temporary array of flags */
	RF_AccessStripeMapHeader_t *new_asm_h[2];
	RF_PhysDiskAddr_t *pda, *parityPDA;
	RF_StripeNum_t parityStripeID;
	RF_PhysDiskAddr_t *failedPDA;
	RF_RaidLayout_t *layoutPtr;

	layoutPtr = &(raidPtr->Layout);
	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress,
	    &which_ru);
	sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
	/* failedPDA points to the pda within the asm that targets the failed
	 * disk */
	failedPDA = asmap->failedPDAs[0];

#if RF_DEBUG_DAG
	if (rf_dagDebug)
		printf("[Creating degraded-write DAG]\n");
#endif

	RF_ASSERT(asmap->numDataFailed == 1);
	dag_h->creator = "SimpleDegradedWriteDAG";

	/*
         * Generate two ASMs identifying the surviving data
         * we need in order to recover the lost data.
         */
	/* overlappingPDAs array must be zero'd */
	memset(overlappingPDAs, 0, RF_MAXCOL);
	rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h,
	    &nXorBufs, NULL, overlappingPDAs, allocList);

	/* create all the nodes at once */
	nWndNodes = asmap->numStripeUnitsAccessed - 1;	/* no access is
							 * generated for the
							 * failed pda */

	nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
	    ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
	/*
         * XXX
         *
         * There's a bug with a complete stripe overwrite- that means 0 reads
         * of old data, and the rest of the DAG generation code doesn't like
         * that. A release is coming, and I don't wanna risk breaking a critical
         * DAG generator, so here's what I'm gonna do- if there's no read nodes,
         * I'm gonna fake there being a read node, and I'm gonna swap in a
         * no-op node in its place (to make all the link-up code happy).
         * This should be fixed at some point.  --jimz
         */
	if (nRrdNodes == 0) {
		nRrdNodes = 1;
		rdnodesFaked = 1;
	} else {
		rdnodesFaked = 0;
	}
	/* lock, unlock, xor, Wnd, Rrd, W(nfaults) */
	nNodes = 5 + nfaults + nWndNodes + nRrdNodes;

	blockNode = rf_AllocDAGNode();
	blockNode->list_next = dag_h->nodes;
	dag_h->nodes = blockNode;

	commitNode = rf_AllocDAGNode();
	commitNode->list_next = dag_h->nodes;
	dag_h->nodes = commitNode;

	unblockNode = rf_AllocDAGNode();
	unblockNode->list_next = dag_h->nodes;
	dag_h->nodes = unblockNode;

	termNode = rf_AllocDAGNode();
	termNode->list_next = dag_h->nodes;
	dag_h->nodes = termNode;

	xorNode = rf_AllocDAGNode();
	xorNode->list_next = dag_h->nodes;
	dag_h->nodes = xorNode;

	wnpNode = rf_AllocDAGNode();
	wnpNode->list_next = dag_h->nodes;
	dag_h->nodes = wnpNode;

	for (i = 0; i < nWndNodes; i++) {
		tmpNode = rf_AllocDAGNode();
		tmpNode->list_next = dag_h->nodes;
		dag_h->nodes = tmpNode;
	}
	wndNodes = dag_h->nodes;

	for (i = 0; i < nRrdNodes; i++) {
		tmpNode = rf_AllocDAGNode();
		tmpNode->list_next = dag_h->nodes;
		dag_h->nodes = tmpNode;
	}
	rrdNodes = dag_h->nodes;

#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
	if (nfaults == 2) {
		wnqNode = rf_AllocDAGNode();
		wnqNode->list_next = dag_h->nodes;
		dag_h->nodes = wnqNode;
	} else {
#endif
		wnqNode = NULL;
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
	}
#endif

	/* this dag can not commit until all rrd and xor Nodes have completed */
	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	dag_h->numSuccedents = 1;

	RF_ASSERT(nRrdNodes > 0);
	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList);
	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList);
	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList);
	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
	    NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
	rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1,
	    nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", allocList);

	/*
         * Fill in the Rrd nodes. If any of the rrd buffers are the same size as
         * the failed buffer, save a pointer to it so we can use it as the target
         * of the XOR. The pdas in the rrd nodes have been range-restricted, so if
         * a buffer is the same size as the failed buffer, it must also be at the
         * same alignment within the SU.
         */
	i = 0;
	tmprrdNode = rrdNodes;
	if (new_asm_h[0]) {
		for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo;
		    i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
		    i++, pda = pda->next) {
			rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
			RF_ASSERT(pda);
			tmprrdNode->params[0].p = pda;
			tmprrdNode->params[1].p = pda->bufPtr;
			tmprrdNode->params[2].v = parityStripeID;
			tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
			tmprrdNode = tmprrdNode->list_next;
		}
	}
	/* i now equals the number of stripe units accessed in new_asm_h[0] */
	/* Note that for tmprrdNode, this means a continuation from above, so no need to
	   assign it anything.. */
	if (new_asm_h[1]) {
		for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
		    j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
		    j++, pda = pda->next) {
			rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
			RF_ASSERT(pda);
			tmprrdNode->params[0].p = pda;
			tmprrdNode->params[1].p = pda->bufPtr;
			tmprrdNode->params[2].v = parityStripeID;
			tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
			if (allowBufferRecycle && (pda->numSector == failedPDA->numSector))
				xorTargetBuf = pda->bufPtr;
			tmprrdNode = tmprrdNode->list_next;
		}
	}
	if (rdnodesFaked) {
		/*
	         * This is where we'll init that fake noop read node
	         * (XXX should the wakeup func be different?)
	         */
		/* node that rrdNodes will just be a single node... */
		rf_InitNode(rrdNodes, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
		    NULL, 1, 1, 0, 0, dag_h, "RrN", allocList);
	}
	/*
         * Make a PDA for the parity unit.  The parity PDA should start at
         * the same offset into the SU as the failed PDA.
         */
	/* Danner comment: I don't think this copy is really necessary. We are
	 * in one of two cases here. (1) The entire failed unit is written.
	 * Then asmap->parityInfo will describe the entire parity. (2) We are
	 * only writing a subset of the failed unit and nothing else. Then the
	 * asmap->parityInfo describes the failed unit and the copy can also
	 * be avoided. */

	parityPDA = rf_AllocPhysDiskAddr();
	parityPDA->next = dag_h->pda_cleanup_list;
	dag_h->pda_cleanup_list = parityPDA;
	parityPDA->col = asmap->parityInfo->col;
	parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
	    * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
	parityPDA->numSector = failedPDA->numSector;

	if (!xorTargetBuf) {
		xorTargetBuf = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
	}
	/* init the Wnp node */
	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
	    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList);
	wnpNode->params[0].p = parityPDA;
	wnpNode->params[1].p = xorTargetBuf;
	wnpNode->params[2].v = parityStripeID;
	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);

#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
	/* fill in the Wnq Node */
	if (nfaults == 2) {
		{
			RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
			    (RF_PhysDiskAddr_t *), allocList);
			parityPDA->col = asmap->qInfo->col;
			parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU)
			    * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
			parityPDA->numSector = failedPDA->numSector;

			rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList);
			wnqNode->params[0].p = parityPDA;
			RF_MallocAndAdd(xorNode->results[1],
			    rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
			wnqNode->params[1].p = xorNode->results[1];
			wnqNode->params[2].v = parityStripeID;
			wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
		}
void
rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
			      RF_DagHeader_t *dag_h, void *bp,
			      RF_RaidAccessFlags_t flags,
			      RF_AllocListElem_t *allocList)
{
	RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
	RF_StripeNum_t parityStripeID;
	int     useMirror, i, shiftable;
	RF_ReconUnitNum_t which_ru;
	RF_PhysDiskAddr_t *pda;

	if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
		shiftable = RF_TRUE;
	} else {
		shiftable = RF_FALSE;
	}
	useMirror = 0;
	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
	    asmap->raidAddress, &which_ru);

#if RF_DEBUG_DAG
	if (rf_dagDebug) {
		printf("[Creating RAID C degraded read DAG]\n");
	}
#endif
	dag_h->creator = "RaidCDegradedReadDAG";
	/* alloc the Wnd nodes and the Wmir node */
	if (asmap->numDataFailed == 0)
		useMirror = RF_FALSE;
	else
		useMirror = RF_TRUE;

	/* total number of nodes = 1 + (block + commit + terminator) */
	RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
	i = 0;
	rdNode = &nodes[i];
	i++;
	blockNode = &nodes[i];
	i++;
	commitNode = &nodes[i];
	i++;
	termNode = &nodes[i];
	i++;

	/*
         * This dag can not commit until the commit node is reached.
         * Errors prior to the commit point imply the dag has failed
         * and must be retried.
         */
	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	dag_h->numSuccedents = 1;

	/* initialize the block, commit, and terminator nodes */
	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
	    NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);

	pda = asmap->physInfo;
	RF_ASSERT(pda != NULL);
	/* parityInfo must describe entire parity unit */
	RF_ASSERT(asmap->parityInfo->next == NULL);

	/* initialize the data node */
	if (!useMirror) {
		rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
		    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList);
		if (shiftable && rf_compute_workload_shift(raidPtr, pda)) {
			/* shift this read to the next disk in line */
			rdNode->params[0].p = asmap->parityInfo;
			rdNode->params[1].p = pda->bufPtr;
			rdNode->params[2].v = parityStripeID;
			rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
		} else {
			/* read primary copy */
			rdNode->params[0].p = pda;
			rdNode->params[1].p = pda->bufPtr;
			rdNode->params[2].v = parityStripeID;
			rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
		}
	} else {
		/* read secondary copy of data */
		rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
		    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList);
		rdNode->params[0].p = asmap->parityInfo;
		rdNode->params[1].p = pda->bufPtr;
		rdNode->params[2].v = parityStripeID;
		rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
	}

	/* connect header to block node */
	RF_ASSERT(dag_h->numSuccedents == 1);
	RF_ASSERT(blockNode->numAntecedents == 0);
	dag_h->succedents[0] = blockNode;

	/* connect block node to rdnode */
	RF_ASSERT(blockNode->numSuccedents == 1);
	RF_ASSERT(rdNode->numAntecedents == 1);
	blockNode->succedents[0] = rdNode;
	rdNode->antecedents[0] = blockNode;
	rdNode->antType[0] = rf_control;

	/* connect rdnode to commit node */
	RF_ASSERT(rdNode->numSuccedents == 1);
	RF_ASSERT(commitNode->numAntecedents == 1);
	rdNode->succedents[0] = commitNode;
	commitNode->antecedents[0] = rdNode;
	commitNode->antType[0] = rf_control;

	/* connect commit node to terminator */
	RF_ASSERT(commitNode->numSuccedents == 1);
	RF_ASSERT(termNode->numAntecedents == 1);
	RF_ASSERT(termNode->numSuccedents == 0);
	commitNode->succedents[0] = termNode;
	termNode->antecedents[0] = commitNode;
	termNode->antType[0] = rf_control;
}
void
rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
			 RF_DagHeader_t *dag_h, void *bp,
			 RF_RaidAccessFlags_t flags,
			 RF_AllocListElem_t *allocList,
			 const RF_RedFuncs_t *recFunc)
{
	RF_DagNode_t *rudNodes, *rrdNodes, *xorNode, *blockNode;
	RF_DagNode_t *commitNode, *rpNode, *termNode;
	RF_DagNode_t *tmpNode, *tmprudNode, *tmprrdNode;
	int     nRrdNodes, nRudNodes, nXorBufs, i;
	int     j, paramNum;
	RF_SectorCount_t sectorsPerSU;
	RF_ReconUnitNum_t which_ru;
	char    overlappingPDAs[RF_MAXCOL];/* a temporary array of flags */
	RF_AccessStripeMapHeader_t *new_asm_h[2];
	RF_PhysDiskAddr_t *pda, *parityPDA;
	RF_StripeNum_t parityStripeID;
	RF_PhysDiskAddr_t *failedPDA;
	RF_RaidLayout_t *layoutPtr;
	char   *rpBuf;

	layoutPtr = &(raidPtr->Layout);
	/* failedPDA points to the pda within the asm that targets the failed
	 * disk */
	failedPDA = asmap->failedPDAs[0];
	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
	    asmap->raidAddress, &which_ru);
	sectorsPerSU = layoutPtr->sectorsPerStripeUnit;

#if RF_DEBUG_DAG
	if (rf_dagDebug) {
		printf("[Creating degraded read DAG]\n");
	}
#endif
	RF_ASSERT(asmap->numDataFailed == 1);
	dag_h->creator = "DegradedReadDAG";

	/*
         * generate two ASMs identifying the surviving data we need
         * in order to recover the lost data
         */

	/* overlappingPDAs array must be zero'd */
	memset(overlappingPDAs, 0, RF_MAXCOL);
	rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs,
	    &rpBuf, overlappingPDAs, allocList);

	/*
         * create all the nodes at once
         *
         * -1 because no access is generated for the failed pda
         */
	nRudNodes = asmap->numStripeUnitsAccessed - 1;
	nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
	    ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);

	blockNode = rf_AllocDAGNode();
	blockNode->list_next = dag_h->nodes;
	dag_h->nodes = blockNode;

	commitNode = rf_AllocDAGNode();
	commitNode->list_next = dag_h->nodes;
	dag_h->nodes = commitNode;

	xorNode = rf_AllocDAGNode();
	xorNode->list_next = dag_h->nodes;
	dag_h->nodes = xorNode;

	rpNode = rf_AllocDAGNode();
	rpNode->list_next = dag_h->nodes;
	dag_h->nodes = rpNode;

	termNode = rf_AllocDAGNode();
	termNode->list_next = dag_h->nodes;
	dag_h->nodes = termNode;

	for (i = 0; i < nRudNodes; i++) {
		tmpNode = rf_AllocDAGNode();
		tmpNode->list_next = dag_h->nodes;
		dag_h->nodes = tmpNode;
	}
	rudNodes = dag_h->nodes;

	for (i = 0; i < nRrdNodes; i++) {
		tmpNode = rf_AllocDAGNode();
		tmpNode->list_next = dag_h->nodes;
		dag_h->nodes = tmpNode;
	}
	rrdNodes = dag_h->nodes;

	/* initialize nodes */
	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	/* this dag can not commit until the commit node is reached errors
	 * prior to the commit point imply the dag has failed */
	dag_h->numSuccedents = 1;

	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
	    NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
	    NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
	rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc,
	    NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h,
	    recFunc->SimpleName, allocList);

	/* fill in the Rud nodes */
	tmprudNode = rudNodes;
	for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) {
		if (pda == failedPDA) {
			i--;
			continue;
		}
		rf_InitNode(tmprudNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
		    rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
		    "Rud", allocList);
		RF_ASSERT(pda);
		tmprudNode->params[0].p = pda;
		tmprudNode->params[1].p = pda->bufPtr;
		tmprudNode->params[2].v = parityStripeID;
		tmprudNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
		tmprudNode = tmprudNode->list_next;
	}

	/* fill in the Rrd nodes */
	i = 0;
	tmprrdNode = rrdNodes;
	if (new_asm_h[0]) {
		for (pda = new_asm_h[0]->stripeMap->physInfo;
		    i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
		    i++, pda = pda->next) {
			rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
			    rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
			    dag_h, "Rrd", allocList);
			RF_ASSERT(pda);
			tmprrdNode->params[0].p = pda;
			tmprrdNode->params[1].p = pda->bufPtr;
			tmprrdNode->params[2].v = parityStripeID;
			tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
			tmprrdNode = tmprrdNode->list_next;
		}
	}
	if (new_asm_h[1]) {
		/* tmprrdNode = rrdNodes; */ /* don't set this here -- old code was using i+j, which means
		   we need to just continue using tmprrdNode for the next 'j' elements. */
		for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
		    j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
		    j++, pda = pda->next) {
			rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
			    rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
			    dag_h, "Rrd", allocList);
			RF_ASSERT(pda);
			tmprrdNode->params[0].p = pda;
			tmprrdNode->params[1].p = pda->bufPtr;
			tmprrdNode->params[2].v = parityStripeID;
			tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
			tmprrdNode = tmprrdNode->list_next;
		}
	}
	/* make a PDA for the parity unit */
	parityPDA = rf_AllocPhysDiskAddr();
	parityPDA->next = dag_h->pda_cleanup_list;
	dag_h->pda_cleanup_list = parityPDA;
	parityPDA->col = asmap->parityInfo->col;
	parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
	    * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
	parityPDA->numSector = failedPDA->numSector;

	/* initialize the Rp node */
	rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
	    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList);
	rpNode->params[0].p = parityPDA;
	rpNode->params[1].p = rpBuf;
	rpNode->params[2].v = parityStripeID;
	rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);

	/*
         * the last and nastiest step is to assign all
         * the parameters of the Xor node
         */
	paramNum = 0;
	tmprrdNode = rrdNodes;
	for (i = 0; i < nRrdNodes; i++) {
		/* all the Rrd nodes need to be xored together */
		xorNode->params[paramNum++] = tmprrdNode->params[0];
		xorNode->params[paramNum++] = tmprrdNode->params[1];
		tmprrdNode = tmprrdNode->list_next;
	}
	tmprudNode = rudNodes;
	for (i = 0; i < nRudNodes; i++) {
		/* any Rud nodes that overlap the failed access need to be
		 * xored in */
		if (overlappingPDAs[i]) {
			pda = rf_AllocPhysDiskAddr();
			memcpy((char *) pda, (char *) tmprudNode->params[0].p, sizeof(RF_PhysDiskAddr_t));
			/* add it into the pda_cleanup_list *after* the copy, TYVM */
			pda->next = dag_h->pda_cleanup_list;
			dag_h->pda_cleanup_list = pda;
			rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0);
			xorNode->params[paramNum++].p = pda;
			xorNode->params[paramNum++].p = pda->bufPtr;
		}
		tmprudNode = tmprudNode->list_next;
	}

	/* install parity pda as last set of params to be xor'd */
	xorNode->params[paramNum++].p = parityPDA;
	xorNode->params[paramNum++].p = rpBuf;

	/*
         * the last 2 params to the recovery xor node are
         * the failed PDA and the raidPtr
         */
	xorNode->params[paramNum++].p = failedPDA;
	xorNode->params[paramNum++].p = raidPtr;
	RF_ASSERT(paramNum == 2 * nXorBufs + 2);

	/*
         * The xor node uses results[0] as the target buffer.
         * Set pointer and zero the buffer. In the kernel, this
         * may be a user buffer in which case we have to remap it.
         */
	xorNode->results[0] = failedPDA->bufPtr;
	memset(failedPDA->bufPtr, 0, rf_RaidAddressToByte(raidPtr,
		failedPDA->numSector));

	/* connect nodes to form graph */
	/* connect the header to the block node */
	RF_ASSERT(dag_h->numSuccedents == 1);
	RF_ASSERT(blockNode->numAntecedents == 0);
	dag_h->succedents[0] = blockNode;

	/* connect the block node to the read nodes */
	RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes));
	RF_ASSERT(rpNode->numAntecedents == 1);
	blockNode->succedents[0] = rpNode;
	rpNode->antecedents[0] = blockNode;
	rpNode->antType[0] = rf_control;
	tmprrdNode = rrdNodes;
	for (i = 0; i < nRrdNodes; i++) {
		RF_ASSERT(tmprrdNode->numSuccedents == 1);
		blockNode->succedents[1 + i] = tmprrdNode;
		tmprrdNode->antecedents[0] = blockNode;
		tmprrdNode->antType[0] = rf_control;
		tmprrdNode = tmprrdNode->list_next;
	}
	tmprudNode = rudNodes;
	for (i = 0; i < nRudNodes; i++) {
		RF_ASSERT(tmprudNode->numSuccedents == 1);
		blockNode->succedents[1 + nRrdNodes + i] = tmprudNode;
		tmprudNode->antecedents[0] = blockNode;
		tmprudNode->antType[0] = rf_control;
		tmprudNode = tmprudNode->list_next;
	}

	/* connect the read nodes to the xor node */
	RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes));
	RF_ASSERT(rpNode->numSuccedents == 1);
	rpNode->succedents[0] = xorNode;
	xorNode->antecedents[0] = rpNode;
	xorNode->antType[0] = rf_trueData;
	tmprrdNode = rrdNodes;
	for (i = 0; i < nRrdNodes; i++) {
		RF_ASSERT(tmprrdNode->numSuccedents == 1);
		tmprrdNode->succedents[0] = xorNode;
		xorNode->antecedents[1 + i] = tmprrdNode;
		xorNode->antType[1 + i] = rf_trueData;
		tmprrdNode = tmprrdNode->list_next;
	}
	tmprudNode = rudNodes;
	for (i = 0; i < nRudNodes; i++) {
		RF_ASSERT(tmprudNode->numSuccedents == 1);
		tmprudNode->succedents[0] = xorNode;
		xorNode->antecedents[1 + nRrdNodes + i] = tmprudNode;
		xorNode->antType[1 + nRrdNodes + i] = rf_trueData;
		tmprudNode = tmprudNode->list_next;
	}

	/* connect the xor node to the commit node */
	RF_ASSERT(xorNode->numSuccedents == 1);
	RF_ASSERT(commitNode->numAntecedents == 1);
	xorNode->succedents[0] = commitNode;
	commitNode->antecedents[0] = xorNode;
	commitNode->antType[0] = rf_control;

	/* connect the termNode to the commit node */
	RF_ASSERT(commitNode->numSuccedents == 1);
	RF_ASSERT(termNode->numAntecedents == 1);
	RF_ASSERT(termNode->numSuccedents == 0);
	commitNode->succedents[0] = termNode;
	termNode->antType[0] = rf_control;
	termNode->antecedents[0] = commitNode;
}
void
rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
			     RF_DagHeader_t *dag_h, void *bp,
			     RF_RaidAccessFlags_t flags,
			     RF_AllocListElem_t *allocList,
			     int nfaults, int (*redFunc) (RF_DagNode_t *),
			     int allowBufferRecycle)
{
	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
	RF_DagNode_t *blockNode, *commitNode, *termNode;
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
	RF_DagNode_t *wnqNode;
#endif
	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
	RF_AccessStripeMapHeader_t *new_asm_h[2];
	RF_StripeNum_t parityStripeID;
	char   *sosBuffer, *eosBuffer;
	RF_ReconUnitNum_t which_ru;
	RF_RaidLayout_t *layoutPtr;
	RF_PhysDiskAddr_t *pda;

	layoutPtr = &(raidPtr->Layout);
	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
							asmap->raidAddress,
							&which_ru);

#if RF_DEBUG_DAG
	if (rf_dagDebug) {
		printf("[Creating large-write DAG]\n");
	}
#endif
	dag_h->creator = "LargeWriteDAG";

	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	dag_h->numSuccedents = 1;

	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
	nWndNodes = asmap->numStripeUnitsAccessed;

	for (i = 0; i < nWndNodes; i++) {
		tmpNode = rf_AllocDAGNode();
		tmpNode->list_next = dag_h->nodes;
		dag_h->nodes = tmpNode;
	}
	wndNodes = dag_h->nodes;

	xorNode = rf_AllocDAGNode();
	xorNode->list_next = dag_h->nodes;
	dag_h->nodes = xorNode;

	wnpNode = rf_AllocDAGNode();
	wnpNode->list_next = dag_h->nodes;
	dag_h->nodes = wnpNode;

	blockNode = rf_AllocDAGNode();
	blockNode->list_next = dag_h->nodes;
	dag_h->nodes = blockNode;

	commitNode = rf_AllocDAGNode();
	commitNode->list_next = dag_h->nodes;
	dag_h->nodes = commitNode;

	termNode = rf_AllocDAGNode();
	termNode->list_next = dag_h->nodes;
	dag_h->nodes = termNode;

#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
	if (nfaults == 2) {
		wnqNode = rf_AllocDAGNode();
	} else {
		wnqNode = NULL;
	}
#endif
	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
					new_asm_h, &nRodNodes, &sosBuffer,
					&eosBuffer, allocList);
	if (nRodNodes > 0) {
		for (i = 0; i < nRodNodes; i++) {
			tmpNode = rf_AllocDAGNode();
			tmpNode->list_next = dag_h->nodes;
			dag_h->nodes = tmpNode;
		}
		rodNodes = dag_h->nodes;
	} else {
		rodNodes = NULL;
	}

	/* begin node initialization */
	if (nRodNodes > 0) {
		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
			    dag_h, "Nil", allocList);
	} else {
		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
			    dag_h, "Nil", allocList);
	}

	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
		    dag_h, "Cmt", allocList);
	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
		    dag_h, "Trm", allocList);

	/* initialize the Rod nodes */
	tmpNode = rodNodes;
	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
		if (new_asm_h[asmNum]) {
			pda = new_asm_h[asmNum]->stripeMap->physInfo;
			while (pda) {
				rf_InitNode(tmpNode, rf_wait,
					    RF_FALSE, rf_DiskReadFunc,
					    rf_DiskReadUndoFunc,
					    rf_GenericWakeupFunc,
					    1, 1, 4, 0, dag_h,
					    "Rod", allocList);
				tmpNode->params[0].p = pda;
				tmpNode->params[1].p = pda->bufPtr;
				tmpNode->params[2].v = parityStripeID;
				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
				    which_ru);
				nodeNum++;
				pda = pda->next;
				tmpNode = tmpNode->list_next;
			}
		}
	}
	RF_ASSERT(nodeNum == nRodNodes);

	/* initialize the wnd nodes */
	pda = asmap->physInfo;
	tmpNode = wndNodes;
	for (i = 0; i < nWndNodes; i++) {
		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
			    rf_GenericWakeupFunc, 1, 1, 4, 0,
			    dag_h, "Wnd", allocList);
		RF_ASSERT(pda != NULL);
		tmpNode->params[0].p = pda;
		tmpNode->params[1].p = pda->bufPtr;
		tmpNode->params[2].v = parityStripeID;
		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
		pda = pda->next;
		tmpNode = tmpNode->list_next;
	}

	/* initialize the redundancy node */
	if (nRodNodes > 0) {
		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
			    rf_NullNodeUndoFunc, NULL, 1,
			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
			    nfaults, dag_h, "Xr ", allocList);
	} else {
		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
			    rf_NullNodeUndoFunc, NULL, 1,
			    1, 2 * (nWndNodes + nRodNodes) + 1,
			    nfaults, dag_h, "Xr ", allocList);
	}
	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
	tmpNode = wndNodes;
	for (i = 0; i < nWndNodes; i++) {
		/* pda */
		xorNode->params[2 * i + 0] = tmpNode->params[0];
		/* buf ptr */
		xorNode->params[2 * i + 1] = tmpNode->params[1];
		tmpNode = tmpNode->list_next;
	}
	tmpNode = rodNodes;
	for (i = 0; i < nRodNodes; i++) {
		/* pda */
		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
		/* buf ptr */
		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
		tmpNode = tmpNode->list_next;
	}
	/* xor node needs to get at RAID information */
	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;

	/*
         * Look for an Rod node that reads a complete SU. If none,
         * alloc a buffer to receive the parity info. Note that we
         * can't use a new data buffer because it will not have gotten
         * written when the xor occurs.  */
	if (allowBufferRecycle) {
		tmpNode = rodNodes;
		for (i = 0; i < nRodNodes; i++) {
			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
				break;
			tmpNode = tmpNode->list_next;
		}
	}
	if ((!allowBufferRecycle) || (i == nRodNodes)) {
		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
	} else {
		/* this works because the only way we get here is if
		   allowBufferRecycle is true and we went through the
		   above for loop, and exited via the break before
		   i==nRodNodes was true.  That means tmpNode will
		   still point to a valid node -- the one we want for
		   here! */
		xorNode->results[0] = tmpNode->params[1].p;
	}

	/* initialize the Wnp node */
	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
		    dag_h, "Wnp", allocList);
	wnpNode->params[0].p = asmap->parityInfo;
	wnpNode->params[1].p = xorNode->results[0];
	wnpNode->params[2].v = parityStripeID;
	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
	/* parityInfo must describe entire parity unit */
	RF_ASSERT(asmap->parityInfo->next == NULL);

#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
	if (nfaults == 2) {
		/*
	         * We never try to recycle a buffer for the Q calcuation
	         * in addition to the parity. This would cause two buffers
	         * to get smashed during the P and Q calculation, guaranteeing
	         * one would be wrong.
	         */
		RF_MallocAndAdd(xorNode->results[1],
				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
				(void *), allocList);
		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
			    1, 1, 4, 0, dag_h, "Wnq", allocList);
		wnqNode->params[0].p = asmap->qInfo;
		wnqNode->params[1].p = xorNode->results[1];
		wnqNode->params[2].v = parityStripeID;
		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
		/* parityInfo must describe entire parity unit */
		RF_ASSERT(asmap->parityInfo->next == NULL);
	}
Example #5
0
void
rf_CreateMirrorReadDAG(
	RF_Raid_t		 *raidPtr,
	RF_AccessStripeMap_t	 *asmap,
	RF_DagHeader_t		 *dag_h,
	void			 *bp,
	RF_RaidAccessFlags_t	  flags,
	RF_AllocListElem_t	 *allocList,
	int			(*readfunc) (RF_DagNode_t *)
)
{
	RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
	RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
	RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
	int i, n, totalNumNodes;

	n = asmap->numStripeUnitsAccessed;
	dag_h->creator = "RaidOneReadDAG";
	if (rf_dagDebug) {
		printf("[Creating RAID level 1 read DAG]\n");
	}
	/*
	 * This dag can not commit until the commit node is reached.
	 * Errors prior to the commit point imply the dag has failed.
	 */
	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	dag_h->numSuccedents = 1;

	/*
	 * Node count:
	 * n data reads
	 * 1 block node
	 * 1 commit node
	 * 1 terminator node
	 */
	RF_ASSERT(n > 0);
	totalNumNodes = n + 3;
	RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
	    (RF_DagNode_t *), allocList);
	i = 0;
	readNodes = &nodes[i];
	i += n;
	blockNode = &nodes[i];
	i += 1;
	commitNode = &nodes[i];
	i += 1;
	termNode = &nodes[i];
	i += 1;
	RF_ASSERT(i == totalNumNodes);

	/* Initialize nodes. */
	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
	    rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
	    rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
	    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);

	for (i = 0; i < n; i++) {
		RF_ASSERT(data_pda != NULL);
		RF_ASSERT(parity_pda != NULL);
		rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
		    rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5,
		    0, dag_h, "Rmir", allocList);
		readNodes[i].params[0].p = data_pda;
		readNodes[i].params[1].p = data_pda->bufPtr;
		/* Parity stripe id is not necessary. */
		readNodes[i].params[2].p = 0;
		readNodes[i].params[3].v =
		    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
		readNodes[i].params[4].p = parity_pda;
		data_pda = data_pda->next;
		parity_pda = parity_pda->next;
	}

	/*
	 * Connect nodes.
	 */

	/* Connect hdr to block node. */
	RF_ASSERT(blockNode->numAntecedents == 0);
	dag_h->succedents[0] = blockNode;

	/* Connect block node to read nodes. */
	RF_ASSERT(blockNode->numSuccedents == n);
	for (i = 0; i < n; i++) {
		RF_ASSERT(readNodes[i].numAntecedents == 1);
		blockNode->succedents[i] = &readNodes[i];
		readNodes[i].antecedents[0] = blockNode;
		readNodes[i].antType[0] = rf_control;
	}

	/* Connect read nodes to commit node. */
	RF_ASSERT(commitNode->numAntecedents == n);
	for (i = 0; i < n; i++) {
		RF_ASSERT(readNodes[i].numSuccedents == 1);
		readNodes[i].succedents[0] = commitNode;
		commitNode->antecedents[i] = &readNodes[i];
		commitNode->antType[i] = rf_control;
	}

	/* Connect commit node to term node. */
	RF_ASSERT(commitNode->numSuccedents == 1);
	RF_ASSERT(termNode->numAntecedents == 1);
	RF_ASSERT(termNode->numSuccedents == 0);
	commitNode->succedents[0] = termNode;
	termNode->antecedents[0] = commitNode;
	termNode->antType[0] = rf_control;
}
Example #6
0
void
rf_CreateNonredundantDAG(
	RF_Raid_t		*raidPtr,
	RF_AccessStripeMap_t	*asmap,
	RF_DagHeader_t		*dag_h,
	void			*bp,
	RF_RaidAccessFlags_t	 flags,
	RF_AllocListElem_t	*allocList,
	RF_IoType_t		 type
)
{
	RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
	RF_PhysDiskAddr_t *pda = asmap->physInfo;
	int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
	int i, n, totalNumNodes;
	char *name;

	n = asmap->numStripeUnitsAccessed;
	dag_h->creator = "NonredundantDAG";

	RF_ASSERT(RF_IO_IS_R_OR_W(type));
	switch (type) {
	case RF_IO_TYPE_READ:
		doFunc = rf_DiskReadFunc;
		undoFunc = rf_DiskReadUndoFunc;
		name = "R  ";
		if (rf_dagDebug)
			printf("[Creating non-redundant read DAG]\n");
		break;
	case RF_IO_TYPE_WRITE:
		doFunc = rf_DiskWriteFunc;
		undoFunc = rf_DiskWriteUndoFunc;
		name = "W  ";
		if (rf_dagDebug)
			printf("[Creating non-redundant write DAG]\n");
		break;
	default:
		RF_PANIC();
	}

	/*
	 * For reads, the dag can not commit until the block node is reached.
	 * For writes, the dag commits immediately.
	 */
	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	dag_h->numSuccedents = 1;

	/*
	 * Node count:
	 * 1 block node
	 * n data reads (or writes)
	 * 1 commit node
	 * 1 terminator node
	 */
	RF_ASSERT(n > 0);
	totalNumNodes = n + 3;
	RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
	    (RF_DagNode_t *), allocList);
	i = 0;
	diskNodes = &nodes[i];
	i += n;
	blockNode = &nodes[i];
	i += 1;
	commitNode = &nodes[i];
	i += 1;
	termNode = &nodes[i];
	i += 1;
	RF_ASSERT(i == totalNumNodes);

	/* Initialize nodes. */
	switch (type) {
	case RF_IO_TYPE_READ:
		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil",
		    allocList);
		rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt",
		    allocList);
		rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm",
		    allocList);
		break;
	case RF_IO_TYPE_WRITE:
		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil",
		    allocList);
		rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt",
		    allocList);
		rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
		    rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm",
		    allocList);
		break;
	default:
		RF_PANIC();
	}

	for (i = 0; i < n; i++) {
		RF_ASSERT(pda != NULL);
		rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc,
		    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList);
		diskNodes[i].params[0].p = pda;
		diskNodes[i].params[1].p = pda->bufPtr;
		/* Parity stripe id is not necessary. */
		diskNodes[i].params[2].v = 0;
		diskNodes[i].params[3].v =
		    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
		pda = pda->next;
	}

	/*
	 * Connect nodes.
	 */

	/* Connect hdr to block node. */
	RF_ASSERT(blockNode->numAntecedents == 0);
	dag_h->succedents[0] = blockNode;

	if (type == RF_IO_TYPE_READ) {
		/* Connecting a nonredundant read DAG. */
		RF_ASSERT(blockNode->numSuccedents == n);
		RF_ASSERT(commitNode->numAntecedents == n);
		for (i = 0; i < n; i++) {
			/* Connect block node to each read node. */
			RF_ASSERT(diskNodes[i].numAntecedents == 1);
			blockNode->succedents[i] = &diskNodes[i];
			diskNodes[i].antecedents[0] = blockNode;
			diskNodes[i].antType[0] = rf_control;

			/* Connect each read node to the commit node. */
			RF_ASSERT(diskNodes[i].numSuccedents == 1);
			diskNodes[i].succedents[0] = commitNode;
			commitNode->antecedents[i] = &diskNodes[i];
			commitNode->antType[i] = rf_control;
		}
		/* Connect the commit node to the term node. */
		RF_ASSERT(commitNode->numSuccedents == 1);
		RF_ASSERT(termNode->numAntecedents == 1);
		RF_ASSERT(termNode->numSuccedents == 0);
		commitNode->succedents[0] = termNode;
		termNode->antecedents[0] = commitNode;
		termNode->antType[0] = rf_control;
	} else {
		/* Connecting a nonredundant write DAG. */
		/* Connect the block node to the commit node. */
		RF_ASSERT(blockNode->numSuccedents == 1);
		RF_ASSERT(commitNode->numAntecedents == 1);
		blockNode->succedents[0] = commitNode;
		commitNode->antecedents[0] = blockNode;
		commitNode->antType[0] = rf_control;

		RF_ASSERT(commitNode->numSuccedents == n);
		RF_ASSERT(termNode->numAntecedents == n);
		RF_ASSERT(termNode->numSuccedents == 0);
		for (i = 0; i < n; i++) {
			/* Connect the commit node to each write node. */
			RF_ASSERT(diskNodes[i].numAntecedents == 1);
			commitNode->succedents[i] = &diskNodes[i];
			diskNodes[i].antecedents[0] = commitNode;
			diskNodes[i].antType[0] = rf_control;

			/* Connect each write node to the term node. */
			RF_ASSERT(diskNodes[i].numSuccedents == 1);
			diskNodes[i].succedents[0] = termNode;
			termNode->antecedents[i] = &diskNodes[i];
			termNode->antType[i] = rf_control;
		}
	}
}