int rf_PQDoubleRecoveryFunc(RF_DagNode_t *node) { int np = node->numParams; RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); int d, i; unsigned coeff; RF_RaidAddr_t sosAddr, suoffset; RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit; int two = 0; RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda; char *buf; int numDataCol = layoutPtr->numDataCol; RF_Etimer_t timer; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_ETIMER_START(timer); if (asmap->failedPDAs[1] && (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { RF_ASSERT(0); ppda = node->params[np - 6].p; ppda2 = node->params[np - 5].p; qpda = node->params[np - 4].p; qpda2 = node->params[np - 3].p; d = (np - 6); two = 1; } else { ppda = node->params[np - 4].p; qpda = node->params[np - 3].p; d = (np - 4); } for (i = 0; i < d; i++) { pda = node->params[i].p; buf = pda->bufPtr; suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); len = pda->numSector; coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); /* See if pda intersects a recovery pda. */ rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); if (two) rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); } /* * Ok, we got the parity back to the point where we can recover. We * now need to determine the coeff of the columns that need to be * recovered. We can also only need to recover a single stripe unit. */ if (asmap->failedPDAs[1] == NULL) { /* * Only a single stripe unit * to recover. */ pda = asmap->failedPDAs[0]; sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); /* Need to determine the column of the other failed disk. */ coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); for (i = 0; i < numDataCol; i++) { npda.raidAddress = sosAddr + (i * secPerSU); (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col] .status)) if (i != coeff) break; } RF_ASSERT(i < numDataCol); RF_ASSERT(two == 0); /* * Recover the data. Since we need only to recover one * column, we overwrite the parity with the other one. */ if (coeff < i) /* Recovering 'a'. */ rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); else /* Recovering 'b'. */ rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); } else RF_PANIC(); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); if (tracerec) tracerec->q_us += RF_ETIMER_VAL_US(timer); rf_GenericWakeupFunc(node, 0); return (0); }
int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *node) { /* * The situation: * * We are doing a write that hits only one failed data unit. The other * failed data unit is not being overwritten, so we need to generate * it. * * For the moment, we assume all the nonfailed data being written is in * the shadow of the failed data unit. (i.e., either a single data * unit write or the entire failed stripe unit is being overwritten.) * * Recovery strategy: apply the recovery data to the parity and Q. * Use P & Q to recover the second failed data unit in P. Zero fill * Q, then apply the recovered data to P. Then apply the data being * written to the failed drive. Then walk through the surviving drives, * applying new data when it exists, othewise the recovery data. * Quite a mess. * * * The params: * * read pda0, read pda1, ..., read pda (numDataCol-3), * write pda0, ..., write pda (numStripeUnitAccess - numDataFailed), * failed pda, raidPtr, asmap */ int np = node->numParams; RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); int i; RF_RaidAddr_t sosAddr; unsigned coeff; RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda; int numDataCol = layoutPtr->numDataCol; RF_Etimer_t timer; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_ASSERT(node->numResults == 2); RF_ASSERT(asmap->failedPDAs[1] == NULL); RF_ETIMER_START(timer); ppda = node->results[0]; qpda = node->results[1]; /* apply the recovery data */ for (i = 0; i < numDataCol - 2; i++) rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); /* Determine the other failed data unit. */ pda = asmap->failedPDAs[0]; sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); /* Need to determine the column of the other failed disk. */ coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); for (i = 0; i < numDataCol; i++) { npda.raidAddress = sosAddr + (i * secPerSU); (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) if (i != coeff) break; } RF_ASSERT(i < numDataCol); /* * Recover the data. The column we want to recover, we write over the * parity. The column we don't care about, we dump in q. */ if (coeff < i) /* Recovering 'a'. */ rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); else /* Recovering 'b'. */ rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); /* OK. The valid data is in P. Zero fill Q, then inc it into it. */ bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector)); rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i); /* Now apply all the write data to the buffer. */ /* * Single stripe unit write case: The failed data is the only thing * we are writing. */ RF_ASSERT(asmap->numStripeUnitsAccessed == 1); /* Dest, src, len, coeff. */ rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff); rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp); /* Now apply all the recovery data. */ for (i = 0; i < numDataCol - 2; i++) rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); if (tracerec) tracerec->q_us += RF_ETIMER_VAL_US(timer); rf_GenericWakeupFunc(node, 0); return (0); }
int rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac) { RF_RowCol_t col; int rc; RF_LOCK_LKMGR_MUTEX(configureMutex); configureCount++; if (isconfigged == 0) { rf_mutex_init(&rf_printf_mutex); /* initialize globals */ DO_INIT_CONFIGURE(rf_ConfigureAllocList); /* * Yes, this does make debugging general to the whole * system instead of being array specific. Bummer, drag. */ rf_ConfigureDebug(cfgPtr); DO_INIT_CONFIGURE(rf_ConfigureDebugMem); #if RF_ACC_TRACE > 0 DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); #endif DO_INIT_CONFIGURE(rf_ConfigureMapModule); DO_INIT_CONFIGURE(rf_ConfigureReconEvent); DO_INIT_CONFIGURE(rf_ConfigureCallback); DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); DO_INIT_CONFIGURE(rf_ConfigureNWayXor); DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); DO_INIT_CONFIGURE(rf_ConfigureMCPair); DO_INIT_CONFIGURE(rf_ConfigureDAGs); DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); DO_INIT_CONFIGURE(rf_ConfigureReconstruction); DO_INIT_CONFIGURE(rf_ConfigureCopyback); DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); DO_INIT_CONFIGURE(rf_ConfigurePSStatus); isconfigged = 1; } RF_UNLOCK_LKMGR_MUTEX(configureMutex); DO_RAID_MUTEX(&raidPtr->mutex); /* set up the cleanup list. Do this after ConfigureDebug so that * value of memDebug will be set */ rf_MakeAllocList(raidPtr->cleanupList); if (raidPtr->cleanupList == NULL) { DO_RAID_FAIL(); return (ENOMEM); } rf_ShutdownCreate(&raidPtr->shutdownList, (void (*) (void *)) rf_FreeAllocList, raidPtr->cleanupList); raidPtr->numCol = cfgPtr->numCol; raidPtr->numSpare = cfgPtr->numSpare; raidPtr->status = rf_rs_optimal; raidPtr->reconControl = NULL; TAILQ_INIT(&(raidPtr->iodone)); simple_lock_init(&(raidPtr->iodone_lock)); DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); raidPtr->outstandingCond = 0; raidPtr->nAccOutstanding = 0; raidPtr->waitShutdown = 0; DO_RAID_MUTEX(&raidPtr->access_suspend_mutex); raidPtr->waitForReconCond = 0; if (ac!=NULL) { /* We have an AutoConfig structure.. Don't do the normal disk configuration... call the auto config stuff */ rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); } else { DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); } /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev * no. is set */ DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); /* Initialize per-RAID PSS bits */ rf_InitPSStatus(raidPtr); #if RF_INCLUDE_CHAINDECLUSTER > 0 for (col = 0; col < raidPtr->numCol; col++) { /* * XXX better distribution */ raidPtr->hist_diskreq[col] = 0; } #endif raidPtr->numNewFailures = 0; raidPtr->copyback_in_progress = 0; raidPtr->parity_rewrite_in_progress = 0; raidPtr->adding_hot_spare = 0; raidPtr->recon_in_progress = 0; raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; /* autoconfigure and root_partition will actually get filled in after the config is done */ raidPtr->autoconfigure = 0; raidPtr->root_partition = 0; raidPtr->last_unit = raidPtr->raidid; raidPtr->config_order = 0; if (rf_keepAccTotals) { raidPtr->keep_acc_totals = 1; } /* Allocate a bunch of buffers to be used in low-memory conditions */ raidPtr->iobuf = NULL; rc = rf_AllocEmergBuffers(raidPtr); if (rc) { printf("raid%d: Unable to allocate emergency buffers.\n", raidPtr->raidid); DO_RAID_FAIL(); return(rc); } /* Set up parity map stuff, if applicable. */ #ifndef RF_NO_PARITY_MAP rf_paritymap_attach(raidPtr, cfgPtr->force); #endif raidPtr->valid = 1; printf("raid%d: %s\n", raidPtr->raidid, raidPtr->Layout.map->configName); printf("raid%d: Components:", raidPtr->raidid); for (col = 0; col < raidPtr->numCol; col++) { printf(" %s", raidPtr->Disks[col].devname); if (RF_DEAD_DISK(raidPtr->Disks[col].status)) { printf("[**FAILED**]"); } } printf("\n"); printf("raid%d: Total Sectors: %" PRIu64 " (%" PRIu64 " MB)\n", raidPtr->raidid, raidPtr->totalSectors, (raidPtr->totalSectors / 1024 * (1 << raidPtr->logBytesPerSector) / 1024)); return (0); }