int rf_State_Lock(RF_RaidAccessDesc_t *desc) { #if RF_ACC_TRACE > 0 RF_AccTraceEntry_t *tracerec = &desc->tracerec; RF_Etimer_t timer; #endif RF_Raid_t *raidPtr = desc->raidPtr; RF_AccessStripeMapHeader_t *asmh = desc->asmap; RF_AccessStripeMap_t *asm_p; RF_StripeNum_t lastStripeID = -1; int suspended = RF_FALSE; #if RF_ACC_TRACE > 0 RF_ETIMER_START(timer); #endif /* acquire each lock that we don't already hold */ for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); if (!rf_suppressLocksAndLargeWrites && asm_p->parityInfo && !(desc->flags & RF_DAG_SUPPRESS_LOCKS) && !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) { asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; /* locks must be acquired hierarchically */ RF_ASSERT(asm_p->stripeID > lastStripeID); lastStripeID = asm_p->stripeID; RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type, (void (*) (struct buf *)) rf_ContinueRaidAccess, desc, asm_p, raidPtr->Layout.dataSectorsPerStripe); if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID, &asm_p->lockReqDesc)) { suspended = RF_TRUE; break; } } if (desc->type == RF_IO_TYPE_WRITE && raidPtr->status == rf_rs_reconstructing) { if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) { int val; asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; val = rf_ForceOrBlockRecon(raidPtr, asm_p, (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc); if (val == 0) { asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; } else { suspended = RF_TRUE; break; } } else { #if RF_DEBUG_PSS > 0 if (rf_pssDebug) { printf("raid%d: skipping force/block because already done, psid %ld\n", desc->raidPtr->raidid, (long) asm_p->stripeID); } #endif } } else { #if RF_DEBUG_PSS > 0 if (rf_pssDebug) { printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n", desc->raidPtr->raidid, (long) asm_p->stripeID); } #endif } } #if RF_ACC_TRACE > 0 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); #endif if (suspended) return (RF_TRUE); desc->state++; return (RF_FALSE); }
void rf_CreateNonredundantDAG( RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, RF_IoType_t type ) { RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode; RF_PhysDiskAddr_t *pda = asmap->physInfo; int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); int i, n, totalNumNodes; char *name; n = asmap->numStripeUnitsAccessed; dag_h->creator = "NonredundantDAG"; RF_ASSERT(RF_IO_IS_R_OR_W(type)); switch (type) { case RF_IO_TYPE_READ: doFunc = rf_DiskReadFunc; undoFunc = rf_DiskReadUndoFunc; name = "R "; if (rf_dagDebug) printf("[Creating non-redundant read DAG]\n"); break; case RF_IO_TYPE_WRITE: doFunc = rf_DiskWriteFunc; undoFunc = rf_DiskWriteUndoFunc; name = "W "; if (rf_dagDebug) printf("[Creating non-redundant write DAG]\n"); break; default: RF_PANIC(); } /* * For reads, the dag can not commit until the block node is reached. * For writes, the dag commits immediately. */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; /* * Node count: * 1 block node * n data reads (or writes) * 1 commit node * 1 terminator node */ RF_ASSERT(n > 0); totalNumNodes = n + 3; RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); i = 0; diskNodes = &nodes[i]; i += n; blockNode = &nodes[i]; i += 1; commitNode = &nodes[i]; i += 1; termNode = &nodes[i]; i += 1; RF_ASSERT(i == totalNumNodes); /* Initialize nodes. */ switch (type) { case RF_IO_TYPE_READ: rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList); rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); break; case RF_IO_TYPE_WRITE: rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt", allocList); rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm", allocList); break; default: RF_PANIC(); } for (i = 0; i < n; i++) { RF_ASSERT(pda != NULL); rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); diskNodes[i].params[0].p = pda; diskNodes[i].params[1].p = pda->bufPtr; /* Parity stripe id is not necessary. */ diskNodes[i].params[2].v = 0; diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); pda = pda->next; } /* * Connect nodes. */ /* Connect hdr to block node. */ RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; if (type == RF_IO_TYPE_READ) { /* Connecting a nonredundant read DAG. */ RF_ASSERT(blockNode->numSuccedents == n); RF_ASSERT(commitNode->numAntecedents == n); for (i = 0; i < n; i++) { /* Connect block node to each read node. */ RF_ASSERT(diskNodes[i].numAntecedents == 1); blockNode->succedents[i] = &diskNodes[i]; diskNodes[i].antecedents[0] = blockNode; diskNodes[i].antType[0] = rf_control; /* Connect each read node to the commit node. */ RF_ASSERT(diskNodes[i].numSuccedents == 1); diskNodes[i].succedents[0] = commitNode; commitNode->antecedents[i] = &diskNodes[i]; commitNode->antType[i] = rf_control; } /* Connect the commit node to the term node. */ RF_ASSERT(commitNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); commitNode->succedents[0] = termNode; termNode->antecedents[0] = commitNode; termNode->antType[0] = rf_control; } else { /* Connecting a nonredundant write DAG. */ /* Connect the block node to the commit node. */ RF_ASSERT(blockNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); blockNode->succedents[0] = commitNode; commitNode->antecedents[0] = blockNode; commitNode->antType[0] = rf_control; RF_ASSERT(commitNode->numSuccedents == n); RF_ASSERT(termNode->numAntecedents == n); RF_ASSERT(termNode->numSuccedents == 0); for (i = 0; i < n; i++) { /* Connect the commit node to each write node. */ RF_ASSERT(diskNodes[i].numAntecedents == 1); commitNode->succedents[i] = &diskNodes[i]; diskNodes[i].antecedents[0] = commitNode; diskNodes[i].antType[0] = rf_control; /* Connect each write node to the term node. */ RF_ASSERT(diskNodes[i].numSuccedents == 1); diskNodes[i].succedents[0] = termNode; termNode->antecedents[i] = &diskNodes[i]; termNode->antType[i] = rf_control; } } }