Esempio n. 1
0
/* Invoked when the copyback has completed. */
void
rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
{
	RF_Raid_t *raidPtr = desc->raidPtr;
	struct timeval t, diff;

	if (!status) {
		RF_LOCK_MUTEX(raidPtr->mutex);
		if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
			RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
			rf_FreeSpareTable(raidPtr);
		} else {
			raidPtr->Disks[desc->spRow][desc->spCol].status =
			    rf_ds_spare;
		}
		RF_UNLOCK_MUTEX(raidPtr->mutex);

		RF_GETTIME(t);
		RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
		printf("Copyback time was %d.%06d seconds.\n",
		    (int) diff.tv_sec, (int) diff.tv_usec);
	} else
		printf("COPYBACK: Failure.\n");

	RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
	rf_FreeMCPair(desc->mcpair);
	RF_Free(desc, sizeof(*desc));

	rf_copyback_in_progress = 0;
	rf_ResumeNewRequests(raidPtr);
}
Esempio n. 2
0
void
rf_PrintReconSchedule(RF_ReconMap_t *mapPtr, struct timeval *starttime)
{
	static int old_pctg = -1;
	struct timeval tv, diff;
	int new_pctg;

	new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 /
	    mapPtr->totalRUs);
	if (new_pctg != old_pctg) {
		RF_GETTIME(tv);
		RF_TIMEVAL_DIFF(starttime, &tv, &diff);
		printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec,
		    (int) diff.tv_usec);
		old_pctg = new_pctg;
	}
}
Esempio n. 3
0
/* Do a complete copyback. */
void
rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
{
	RF_ComponentLabel_t c_label;
	int done, retcode;
	RF_CopybackDesc_t *desc;
	RF_RowCol_t frow, fcol;
	RF_RaidDisk_t *badDisk;
	char *databuf;

	struct partinfo dpart;
	struct vnode *vp;
	struct vattr va;
	struct proc *proc;

	int ac;

	done = 0;
	fcol = 0;
	for (frow = 0; frow < raidPtr->numRow; frow++) {
		for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
			if (raidPtr->Disks[frow][fcol].status ==
			     rf_ds_dist_spared ||
			    raidPtr->Disks[frow][fcol].status ==
			     rf_ds_spared) {
				done = 1;
				break;
			}
		}
		if (done)
			break;
	}

	if (frow == raidPtr->numRow) {
		printf("COPYBACK: No disks need copyback.\n");
		return;
	}
	badDisk = &raidPtr->Disks[frow][fcol];

	proc = raidPtr->engine_thread;

	/*
	 * This device may have been opened successfully the first time.
	 * Close it before trying to open it again.
	 */

	if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
		printf("Close the opened device: %s.\n",
		    raidPtr->Disks[frow][fcol].devname);
 		vp = raidPtr->raid_cinfo[frow][fcol].ci_vp;
 		ac = raidPtr->Disks[frow][fcol].auto_configured;
 		rf_close_component(raidPtr, vp, ac);
		raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL;

	}
 	/* Note that this disk was *not* auto_configured (any longer). */
 	raidPtr->Disks[frow][fcol].auto_configured = 0;

	printf("About to (re-)open the device: %s.\n",
	    raidPtr->Disks[frow][fcol].devname);

	retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);

	if (retcode) {
		printf("COPYBACK: raidlookup on device: %s failed: %d !\n",
		    raidPtr->Disks[frow][fcol].devname, retcode);

		/*
		 * XXX The component isn't responding properly... Must be
		 * still dead :-(
		 */
		return;

	} else {

		/*
		 * Ok, so we can at least do a lookup...
		 * How about actually getting a vp for it ?
		 */

		if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0)
		{
			return;
		}
		retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) &dpart, FREAD,
		    proc->p_ucred, proc);
		if (retcode) {
			return;
		}
		raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;

		raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size -
		    rf_protectedSectors;

		raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
		raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;

		/* XXX Or the above ? */
		raidPtr->Disks[frow][fcol].dev = va.va_rdev;

		/*
		 * We allow the user to specify that only a fraction of the
		 * disks should be used this is just for debug: it speeds up
		 * the parity scan.
		 */
		raidPtr->Disks[frow][fcol].numBlocks =
		    raidPtr->Disks[frow][fcol].numBlocks *
		    rf_sizePercentage / 100;
	}
#if 0
	/* This is the way it was done before the CAM stuff was removed. */

	if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
		printf("COPYBACK: unable to extract bus, target, lun from"
		    " devname %s.\n", badDisk->devname);
		return;
	}
	/*
	 * TUR the disk that's marked as bad to be sure that it's actually
	 * alive.
	 */
	rf_SCSI_AllocTUR(&tur_op);
	retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
	rf_SCSI_FreeDiskOp(tur_op, 0);
#endif

	if (retcode) {
		printf("COPYBACK: target disk failed TUR.\n");
		return;
	}
	/* Get a buffer to hold one SU. */
	RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr,
	    raidPtr->Layout.sectorsPerStripeUnit), (char *));

	/* Create a descriptor. */
	RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
	desc->raidPtr = raidPtr;
	desc->status = 0;
	desc->frow = frow;
	desc->fcol = fcol;
	desc->spRow = badDisk->spareRow;
	desc->spCol = badDisk->spareCol;
	desc->stripeAddr = 0;
	desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
	desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit *
	    raidPtr->Layout.numDataCol;
	desc->databuf = databuf;
	desc->mcpair = rf_AllocMCPair();

	printf("COPYBACK: Quiescing the array.\n");
	/*
	 * Quiesce the array, since we don't want to code support for user
	 * accs here.
	 */
	rf_SuspendNewRequestsAndWait(raidPtr);

	/* Adjust state of the array and of the disks. */
	RF_LOCK_MUTEX(raidPtr->mutex);
	raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
	raidPtr->status[desc->frow] = rf_rs_optimal;
	rf_copyback_in_progress = 1;	/* Debug only. */
	RF_UNLOCK_MUTEX(raidPtr->mutex);

	printf("COPYBACK: Beginning\n");
	RF_GETTIME(desc->starttime);
	rf_ContinueCopyback(desc);

	/*
	 * Data has been restored.
	 * Fix up the component label.
	 * Don't actually need the read here.
	 */
	raidread_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
				 raidPtr->raid_cinfo[frow][fcol].ci_vp,
				 &c_label);

	raid_init_component_label(raidPtr, &c_label);

	c_label.row = frow;
	c_label.column = fcol;

	raidwrite_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
				  raidPtr->raid_cinfo[frow][fcol].ci_vp,
				  &c_label);
}
Esempio n. 4
0
/*
 * Invoked via callback after a copyback I/O has completed to
 * continue on with the next one.
 */
void
rf_ContinueCopyback(RF_CopybackDesc_t *desc)
{
	RF_SectorNum_t testOffs, stripeAddr;
	RF_Raid_t *raidPtr = desc->raidPtr;
	RF_RaidAddr_t addr;
	RF_RowCol_t testRow, testCol;
	int old_pctg, new_pctg, done;
	struct timeval t, diff;

	old_pctg = (-1);
	while (1) {
		stripeAddr = desc->stripeAddr;
		desc->raidPtr->copyback_stripes_done = stripeAddr /
		    desc->sectPerStripe;
		if (rf_prReconSched) {
			old_pctg = 100 * desc->stripeAddr /
			    raidPtr->totalSectors;
		}
		desc->stripeAddr += desc->sectPerStripe;
		if (rf_prReconSched) {
			new_pctg = 100 * desc->stripeAddr /
			    raidPtr->totalSectors;
			if (new_pctg != old_pctg) {
				RF_GETTIME(t);
				RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
				printf("%d %d.%06d\n", new_pctg,
				    (int) diff.tv_sec, (int) diff.tv_usec);
			}
		}
		if (stripeAddr >= raidPtr->totalSectors) {
			rf_CopybackComplete(desc, 0);
			return;
		}
		/* Walk through the current stripe, su-by-su. */
		for (done = 0, addr = stripeAddr;
		     addr < stripeAddr + desc->sectPerStripe;
		     addr += desc->sectPerSU) {

			/* Map the SU, disallowing remap to spare space. */
			(raidPtr->Layout.map->MapSector) (raidPtr, addr,
			    &testRow, &testCol, &testOffs, RF_DONT_REMAP);

			if (testRow == desc->frow && testCol == desc->fcol) {
				rf_CopybackOne(desc, RF_COPYBACK_DATA, addr,
				    testRow, testCol, testOffs);
				done = 1;
				break;
			}
		}

		if (!done) {
			/*
			 * We didn't find the failed disk in the data part,
			 * check parity.
			 */

			/*
			 * Map the parity for this stripe, disallowing remap
			 * to spare space.
			 */
			(raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr,
			    &testRow, &testCol, &testOffs, RF_DONT_REMAP);

			if (testRow == desc->frow && testCol == desc->fcol) {
				rf_CopybackOne(desc, RF_COPYBACK_PARITY,
				    stripeAddr, testRow, testCol, testOffs);
			}
		}
		/* Check to see if the last read/write pair failed. */
		if (desc->status) {
			rf_CopybackComplete(desc, 1);
			return;
		}
		/*
		 * We didn't find any units to copy back in this stripe.
		 * Continue with the next one.
		 */
	}
}