示例#1
0
int
rf_State_Lock(RF_RaidAccessDesc_t *desc)
{
#if RF_ACC_TRACE > 0
	RF_AccTraceEntry_t *tracerec = &desc->tracerec;
	RF_Etimer_t timer;
#endif
	RF_Raid_t *raidPtr = desc->raidPtr;
	RF_AccessStripeMapHeader_t *asmh = desc->asmap;
	RF_AccessStripeMap_t *asm_p;
	RF_StripeNum_t lastStripeID = -1;
	int     suspended = RF_FALSE;

#if RF_ACC_TRACE > 0
	RF_ETIMER_START(timer);
#endif

	/* acquire each lock that we don't already hold */
	for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
		RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
		if (!rf_suppressLocksAndLargeWrites &&
		    asm_p->parityInfo &&
		    !(desc->flags & RF_DAG_SUPPRESS_LOCKS) &&
		    !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) {
			asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
				/* locks must be acquired hierarchically */
			RF_ASSERT(asm_p->stripeID > lastStripeID);
			lastStripeID = asm_p->stripeID;

			RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
					      (void (*) (struct buf *)) rf_ContinueRaidAccess, desc, asm_p,
					      raidPtr->Layout.dataSectorsPerStripe);
			if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
						 &asm_p->lockReqDesc)) {
				suspended = RF_TRUE;
				break;
			}
		}
		if (desc->type == RF_IO_TYPE_WRITE &&
		    raidPtr->status == rf_rs_reconstructing) {
			if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) {
				int     val;

				asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
				val = rf_ForceOrBlockRecon(raidPtr, asm_p,
							   (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc);
				if (val == 0) {
					asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
				} else {
					suspended = RF_TRUE;
					break;
				}
			} else {
#if RF_DEBUG_PSS > 0
				if (rf_pssDebug) {
					printf("raid%d: skipping force/block because already done, psid %ld\n",
					       desc->raidPtr->raidid,
					       (long) asm_p->stripeID);
				}
#endif
			}
		} else {
#if RF_DEBUG_PSS > 0
			if (rf_pssDebug) {
				printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n",
				       desc->raidPtr->raidid,
				       (long) asm_p->stripeID);
			}
#endif
		}
	}
#if RF_ACC_TRACE > 0
	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
#endif
	if (suspended)
		return (RF_TRUE);

	desc->state++;
	return (RF_FALSE);
}
示例#2
0
void
rf_CreateNonredundantDAG(
	RF_Raid_t		*raidPtr,
	RF_AccessStripeMap_t	*asmap,
	RF_DagHeader_t		*dag_h,
	void			*bp,
	RF_RaidAccessFlags_t	 flags,
	RF_AllocListElem_t	*allocList,
	RF_IoType_t		 type
)
{
	RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
	RF_PhysDiskAddr_t *pda = asmap->physInfo;
	int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
	int i, n, totalNumNodes;
	char *name;

	n = asmap->numStripeUnitsAccessed;
	dag_h->creator = "NonredundantDAG";

	RF_ASSERT(RF_IO_IS_R_OR_W(type));
	switch (type) {
	case RF_IO_TYPE_READ:
		doFunc = rf_DiskReadFunc;
		undoFunc = rf_DiskReadUndoFunc;
		name = "R  ";
		if (rf_dagDebug)
			printf("[Creating non-redundant read DAG]\n");
		break;
	case RF_IO_TYPE_WRITE:
		doFunc = rf_DiskWriteFunc;
		undoFunc = rf_DiskWriteUndoFunc;
		name = "W  ";
		if (rf_dagDebug)
			printf("[Creating non-redundant write DAG]\n");
		break;
	default:
		RF_PANIC();
	}

	/*
	 * For reads, the dag can not commit until the block node is reached.
	 * For writes, the dag commits immediately.
	 */
	dag_h->numCommitNodes = 1;
	dag_h->numCommits = 0;
	dag_h->numSuccedents = 1;

	/*
	 * Node count:
	 * 1 block node
	 * n data reads (or writes)
	 * 1 commit node
	 * 1 terminator node
	 */
	RF_ASSERT(n > 0);
	totalNumNodes = n + 3;
	RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
	    (RF_DagNode_t *), allocList);
	i = 0;
	diskNodes = &nodes[i];
	i += n;
	blockNode = &nodes[i];
	i += 1;
	commitNode = &nodes[i];
	i += 1;
	termNode = &nodes[i];
	i += 1;
	RF_ASSERT(i == totalNumNodes);

	/* Initialize nodes. */
	switch (type) {
	case RF_IO_TYPE_READ:
		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil",
		    allocList);
		rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt",
		    allocList);
		rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm",
		    allocList);
		break;
	case RF_IO_TYPE_WRITE:
		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil",
		    allocList);
		rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
		    rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt",
		    allocList);
		rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
		    rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm",
		    allocList);
		break;
	default:
		RF_PANIC();
	}

	for (i = 0; i < n; i++) {
		RF_ASSERT(pda != NULL);
		rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc,
		    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList);
		diskNodes[i].params[0].p = pda;
		diskNodes[i].params[1].p = pda->bufPtr;
		/* Parity stripe id is not necessary. */
		diskNodes[i].params[2].v = 0;
		diskNodes[i].params[3].v =
		    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
		pda = pda->next;
	}

	/*
	 * Connect nodes.
	 */

	/* Connect hdr to block node. */
	RF_ASSERT(blockNode->numAntecedents == 0);
	dag_h->succedents[0] = blockNode;

	if (type == RF_IO_TYPE_READ) {
		/* Connecting a nonredundant read DAG. */
		RF_ASSERT(blockNode->numSuccedents == n);
		RF_ASSERT(commitNode->numAntecedents == n);
		for (i = 0; i < n; i++) {
			/* Connect block node to each read node. */
			RF_ASSERT(diskNodes[i].numAntecedents == 1);
			blockNode->succedents[i] = &diskNodes[i];
			diskNodes[i].antecedents[0] = blockNode;
			diskNodes[i].antType[0] = rf_control;

			/* Connect each read node to the commit node. */
			RF_ASSERT(diskNodes[i].numSuccedents == 1);
			diskNodes[i].succedents[0] = commitNode;
			commitNode->antecedents[i] = &diskNodes[i];
			commitNode->antType[i] = rf_control;
		}
		/* Connect the commit node to the term node. */
		RF_ASSERT(commitNode->numSuccedents == 1);
		RF_ASSERT(termNode->numAntecedents == 1);
		RF_ASSERT(termNode->numSuccedents == 0);
		commitNode->succedents[0] = termNode;
		termNode->antecedents[0] = commitNode;
		termNode->antType[0] = rf_control;
	} else {
		/* Connecting a nonredundant write DAG. */
		/* Connect the block node to the commit node. */
		RF_ASSERT(blockNode->numSuccedents == 1);
		RF_ASSERT(commitNode->numAntecedents == 1);
		blockNode->succedents[0] = commitNode;
		commitNode->antecedents[0] = blockNode;
		commitNode->antType[0] = rf_control;

		RF_ASSERT(commitNode->numSuccedents == n);
		RF_ASSERT(termNode->numAntecedents == n);
		RF_ASSERT(termNode->numSuccedents == 0);
		for (i = 0; i < n; i++) {
			/* Connect the commit node to each write node. */
			RF_ASSERT(diskNodes[i].numAntecedents == 1);
			commitNode->succedents[i] = &diskNodes[i];
			diskNodes[i].antecedents[0] = commitNode;
			diskNodes[i].antType[0] = rf_control;

			/* Connect each write node to the term node. */
			RF_ASSERT(diskNodes[i].numSuccedents == 1);
			diskNodes[i].succedents[0] = termNode;
			termNode->antecedents[i] = &diskNodes[i];
			termNode->antType[i] = rf_control;
		}
	}
}