示例#1
0
int
rf_PQDoubleRecoveryFunc(RF_DagNode_t *node)
{
	int np = node->numParams;
	RF_AccessStripeMap_t *asmap =
	    (RF_AccessStripeMap_t *) node->params[np - 1].p;
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
	int d, i;
	unsigned coeff;
	RF_RaidAddr_t sosAddr, suoffset;
	RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit;
	int two = 0;
	RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda;
	char *buf;
	int numDataCol = layoutPtr->numDataCol;
	RF_Etimer_t timer;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;

	RF_ETIMER_START(timer);

	if (asmap->failedPDAs[1] &&
	    (asmap->failedPDAs[1]->numSector +
	     asmap->failedPDAs[0]->numSector < secPerSU)) {
		RF_ASSERT(0);
		ppda = node->params[np - 6].p;
		ppda2 = node->params[np - 5].p;
		qpda = node->params[np - 4].p;
		qpda2 = node->params[np - 3].p;
		d = (np - 6);
		two = 1;
	} else {
		ppda = node->params[np - 4].p;
		qpda = node->params[np - 3].p;
		d = (np - 4);
	}

	for (i = 0; i < d; i++) {
		pda = node->params[i].p;
		buf = pda->bufPtr;
		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
		len = pda->numSector;
		coeff = rf_RaidAddressToStripeUnitID(layoutPtr,
		    pda->raidAddress);
		/* Compute the data unit offset within the column. */
		coeff = (coeff % raidPtr->Layout.numDataCol);
		/* See if pda intersects a recovery pda. */
		rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
		if (two)
			rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
	}

	/*
	 * Ok, we got the parity back to the point where we can recover. We
	 * now need to determine the coeff of the columns that need to be
	 * recovered. We can also only need to recover a single stripe unit.
	 */

	if (asmap->failedPDAs[1] == NULL) {	/*
						 * Only a single stripe unit
						 * to recover.
						 */
		pda = asmap->failedPDAs[0];
		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
		    asmap->raidAddress);
		/* Need to determine the column of the other failed disk. */
		coeff = rf_RaidAddressToStripeUnitID(layoutPtr,
		    pda->raidAddress);
		/* Compute the data unit offset within the column. */
		coeff = (coeff % raidPtr->Layout.numDataCol);
		for (i = 0; i < numDataCol; i++) {
			npda.raidAddress = sosAddr + (i * secPerSU);
			(raidPtr->Layout.map->MapSector) (raidPtr,
			    npda.raidAddress, &(npda.row), &(npda.col),
			    &(npda.startSector), 0);
			/* Skip over dead disks. */
			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col]
			    .status))
				if (i != coeff)
					break;
		}
		RF_ASSERT(i < numDataCol);
		RF_ASSERT(two == 0);
		/*
		 * Recover the data. Since we need only to recover one
		 * column, we overwrite the parity with the other one.
		 */
		if (coeff < i)	/* Recovering 'a'. */
			rf_PQ_recover((unsigned long *) ppda->bufPtr,
			    (unsigned long *) qpda->bufPtr,
			    (unsigned long *) pda->bufPtr,
			    (unsigned long *) ppda->bufPtr,
			    rf_RaidAddressToByte(raidPtr, pda->numSector),
			    coeff, i);
		else		/* Recovering 'b'. */
			rf_PQ_recover((unsigned long *) ppda->bufPtr,
			    (unsigned long *) qpda->bufPtr,
			    (unsigned long *) ppda->bufPtr,
			    (unsigned long *) pda->bufPtr,
			    rf_RaidAddressToByte(raidPtr, pda->numSector),
			    i, coeff);
	} else
		RF_PANIC();

	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	if (tracerec)
		tracerec->q_us += RF_ETIMER_VAL_US(timer);
	rf_GenericWakeupFunc(node, 0);
	return (0);
}
示例#2
0
int
rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *node)
{
	/*
	 * The situation:
	 *
	 * We are doing a write that hits only one failed data unit. The other
	 * failed data unit is not being overwritten, so we need to generate
	 * it.
	 *
	 * For the moment, we assume all the nonfailed data being written is in
	 * the shadow of the failed data unit. (i.e., either a single data
	 * unit write or the entire failed stripe unit is being overwritten.)
	 *
	 * Recovery strategy: apply the recovery data to the parity and Q.
	 * Use P & Q to recover the second failed data unit in P. Zero fill
	 * Q, then apply the recovered data to P. Then apply the data being
	 * written to the failed drive. Then walk through the surviving drives,
	 * applying new data when it exists, othewise the recovery data.
	 * Quite a mess.
	 *
	 *
	 * The params:
	 *
	 *   read pda0, read pda1, ..., read pda (numDataCol-3),
	 *   write pda0, ..., write pda (numStripeUnitAccess - numDataFailed),
	 *   failed pda, raidPtr, asmap
	 */

	int np = node->numParams;
	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
	    node->params[np - 1].p;
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
	int i;
	RF_RaidAddr_t sosAddr;
	unsigned coeff;
	RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
	RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda;
	int numDataCol = layoutPtr->numDataCol;
	RF_Etimer_t timer;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;

	RF_ASSERT(node->numResults == 2);
	RF_ASSERT(asmap->failedPDAs[1] == NULL);
	RF_ETIMER_START(timer);
	ppda = node->results[0];
	qpda = node->results[1];
	/* apply the recovery data */
	for (i = 0; i < numDataCol - 2; i++)
		rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda,
		    node->dagHdr->bp);

	/* Determine the other failed data unit. */
	pda = asmap->failedPDAs[0];
	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
	    asmap->raidAddress);
	/* Need to determine the column of the other failed disk. */
	coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
	/* Compute the data unit offset within the column. */
	coeff = (coeff % raidPtr->Layout.numDataCol);
	for (i = 0; i < numDataCol; i++) {
		npda.raidAddress = sosAddr + (i * secPerSU);
		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
		    &(npda.row), &(npda.col), &(npda.startSector), 0);
		/* Skip over dead disks. */
		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
			if (i != coeff)
				break;
	}
	RF_ASSERT(i < numDataCol);
	/*
	 * Recover the data. The column we want to recover, we write over the
	 * parity. The column we don't care about, we dump in q.
	 */
	if (coeff < i)		/* Recovering 'a'. */
		rf_PQ_recover((unsigned long *) ppda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    (unsigned long *) ppda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
	else			/* Recovering 'b'. */
		rf_PQ_recover((unsigned long *) ppda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    (unsigned long *) qpda->bufPtr,
		    (unsigned long *) ppda->bufPtr,
		    rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);

	/* OK. The valid data is in P. Zero fill Q, then inc it into it. */
	bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector));
	rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr,
	    rf_RaidAddressToByte(raidPtr, qpda->numSector), i);

	/* Now apply all the write data to the buffer. */
	/*
	 * Single stripe unit write case: The failed data is the only thing
	 * we are writing.
	 */
	RF_ASSERT(asmap->numStripeUnitsAccessed == 1);
	/* Dest, src, len, coeff. */
	rf_IncQ((unsigned long *) qpda->bufPtr,
	    (unsigned long *) asmap->failedPDAs[0]->bufPtr,
	    rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff);
	rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr,
	    rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp);

	/* Now apply all the recovery data. */
	for (i = 0; i < numDataCol - 2; i++)
		rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda,
		    node->dagHdr->bp);

	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	if (tracerec)
		tracerec->q_us += RF_ETIMER_VAL_US(timer);

	rf_GenericWakeupFunc(node, 0);
	return (0);
}
示例#3
0
int
rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
{
	RF_RowCol_t col;
	int rc;

	RF_LOCK_LKMGR_MUTEX(configureMutex);
	configureCount++;
	if (isconfigged == 0) {
		rf_mutex_init(&rf_printf_mutex);

		/* initialize globals */

		DO_INIT_CONFIGURE(rf_ConfigureAllocList);

		/*
	         * Yes, this does make debugging general to the whole
	         * system instead of being array specific. Bummer, drag.
		 */
		rf_ConfigureDebug(cfgPtr);
		DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
#if RF_ACC_TRACE > 0
		DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
#endif
		DO_INIT_CONFIGURE(rf_ConfigureMapModule);
		DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
		DO_INIT_CONFIGURE(rf_ConfigureCallback);
		DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
		DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
		DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
		DO_INIT_CONFIGURE(rf_ConfigureMCPair);
		DO_INIT_CONFIGURE(rf_ConfigureDAGs);
		DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
		DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
		DO_INIT_CONFIGURE(rf_ConfigureCopyback);
		DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
		DO_INIT_CONFIGURE(rf_ConfigurePSStatus);
		isconfigged = 1;
	}
	RF_UNLOCK_LKMGR_MUTEX(configureMutex);

	DO_RAID_MUTEX(&raidPtr->mutex);
	/* set up the cleanup list.  Do this after ConfigureDebug so that
	 * value of memDebug will be set */

	rf_MakeAllocList(raidPtr->cleanupList);
	if (raidPtr->cleanupList == NULL) {
		DO_RAID_FAIL();
		return (ENOMEM);
	}
	rf_ShutdownCreate(&raidPtr->shutdownList,
			  (void (*) (void *)) rf_FreeAllocList,
			  raidPtr->cleanupList);

	raidPtr->numCol = cfgPtr->numCol;
	raidPtr->numSpare = cfgPtr->numSpare;

	raidPtr->status = rf_rs_optimal;
	raidPtr->reconControl = NULL;

	TAILQ_INIT(&(raidPtr->iodone));
	simple_lock_init(&(raidPtr->iodone_lock));

	DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
	DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);

	raidPtr->outstandingCond = 0;

	raidPtr->nAccOutstanding = 0;
	raidPtr->waitShutdown = 0;

	DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);

	raidPtr->waitForReconCond = 0;

	if (ac!=NULL) {
		/* We have an AutoConfig structure..  Don't do the
		   normal disk configuration... call the auto config
		   stuff */
		rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
	} else {
		DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
		DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
	}
	/* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
	 * no. is set */
	DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);

	DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);

	/* Initialize per-RAID PSS bits */
	rf_InitPSStatus(raidPtr);

#if RF_INCLUDE_CHAINDECLUSTER > 0
	for (col = 0; col < raidPtr->numCol; col++) {
		/*
		 * XXX better distribution
		 */
		raidPtr->hist_diskreq[col] = 0;
	}
#endif
	raidPtr->numNewFailures = 0;
	raidPtr->copyback_in_progress = 0;
	raidPtr->parity_rewrite_in_progress = 0;
	raidPtr->adding_hot_spare = 0;
	raidPtr->recon_in_progress = 0;
	raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;

	/* autoconfigure and root_partition will actually get filled in
	   after the config is done */
	raidPtr->autoconfigure = 0;
	raidPtr->root_partition = 0;
	raidPtr->last_unit = raidPtr->raidid;
	raidPtr->config_order = 0;

	if (rf_keepAccTotals) {
		raidPtr->keep_acc_totals = 1;
	}

	/* Allocate a bunch of buffers to be used in low-memory conditions */
	raidPtr->iobuf = NULL;

	rc = rf_AllocEmergBuffers(raidPtr);
	if (rc) {
		printf("raid%d: Unable to allocate emergency buffers.\n",
		       raidPtr->raidid);
		DO_RAID_FAIL();
		return(rc);
	}

	/* Set up parity map stuff, if applicable. */
#ifndef RF_NO_PARITY_MAP
	rf_paritymap_attach(raidPtr, cfgPtr->force);
#endif

	raidPtr->valid = 1;

	printf("raid%d: %s\n", raidPtr->raidid,
	       raidPtr->Layout.map->configName);
	printf("raid%d: Components:", raidPtr->raidid);

	for (col = 0; col < raidPtr->numCol; col++) {
		printf(" %s", raidPtr->Disks[col].devname);
		if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
			printf("[**FAILED**]");
		}
	}
	printf("\n");
	printf("raid%d: Total Sectors: %" PRIu64 " (%" PRIu64 " MB)\n",
	       raidPtr->raidid,
	       raidPtr->totalSectors,
	       (raidPtr->totalSectors / 1024 *
				(1 << raidPtr->logBytesPerSector) / 1024));

	return (0);
}