/* * Called to shut down an array. */ int rf_Shutdown(RF_Raid_t *raidPtr) { if (!raidPtr->valid) { RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); return (EINVAL); } /* * wait for outstanding IOs to land * As described in rf_raid.h, we use the rad_freelist lock * to protect the per-array info about outstanding descs * since we need to do freelist locking anyway, and this * cuts down on the amount of serialization we've got going * on. */ RF_LOCK_MUTEX(rf_rad_lock); if (raidPtr->waitShutdown) { RF_UNLOCK_MUTEX(rf_rad_lock); return (EBUSY); } raidPtr->waitShutdown = 1; while (raidPtr->nAccOutstanding) { RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_lock); } RF_UNLOCK_MUTEX(rf_rad_lock); /* Wait for any parity re-writes to stop... */ while (raidPtr->parity_rewrite_in_progress) { printf("raid%d: Waiting for parity re-write to exit...\n", raidPtr->raidid); tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, "rfprwshutdown", 0); } /* Wait for any reconstruction to stop... */ while (raidPtr->reconInProgress) { printf("raid%d: Waiting for reconstruction to stop...\n", raidPtr->raidid); tsleep(&raidPtr->waitForReconCond, PRIBIO, "rfreshutdown",0); } raidPtr->valid = 0; if (raidPtr->parity_map != NULL) rf_paritymap_detach(raidPtr); rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); rf_UnconfigureVnodes(raidPtr); rf_FreeEmergBuffers(raidPtr); rf_ShutdownList(&raidPtr->shutdownList); rf_UnconfigureArray(); return (0); }
int rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { RF_RaidDisk_t **disks; int ret; int r; RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList); if (disks == NULL) { ret = ENOMEM; goto fail; } raidPtr->Disks = disks; /* Get space for the device-specific stuff... */ RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, sizeof(struct raidcinfo *), (struct raidcinfo **), raidPtr->cleanupList); if (raidPtr->raid_cinfo == NULL) { ret = ENOMEM; goto fail; } for (r = 0; r < raidPtr->numRow; r++) { /* * We allocate RF_MAXSPARE on the first row so that we * have room to do hot-swapping of spares. */ RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList); if (disks[r] == NULL) { ret = ENOMEM; goto fail; } /* Get more space for device specific stuff... */ RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), sizeof(struct raidcinfo), (struct raidcinfo *), raidPtr->cleanupList); if (raidPtr->raid_cinfo[r] == NULL) { ret = ENOMEM; goto fail; } } return(0); fail: rf_UnconfigureVnodes(raidPtr); return(ret); }
/* Configure a single disk during auto-configuration at boot. */ int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *auto_config) { RF_RaidDisk_t **disks; RF_RaidDisk_t *diskPtr; RF_RowCol_t r, c; RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; int bs, ret; int numFailuresThisRow; int force; RF_AutoConfig_t *ac; int parity_good; int mod_counter; int mod_counter_found; #if DEBUG printf("Starting autoconfiguration of RAID set...\n"); #endif /* DEBUG */ force = cfgPtr->force; ret = rf_AllocDiskStructures(raidPtr, cfgPtr); if (ret) goto fail; disks = raidPtr->Disks; /* Assume the parity will be fine... */ parity_good = RF_RAID_CLEAN; /* Check for mod_counters that are too low. */ mod_counter_found = 0; ac = auto_config; while(ac!=NULL) { if (mod_counter_found == 0) { mod_counter = ac->clabel->mod_counter; mod_counter_found = 1; } else { if (ac->clabel->mod_counter > mod_counter) { mod_counter = ac->clabel->mod_counter; } } ac->flag = 0; /* Clear the general purpose flag. */ ac = ac->next; } for (r = 0; r < raidPtr->numRow; r++) { numFailuresThisRow = 0; for (c = 0; c < raidPtr->numCol; c++) { diskPtr = &disks[r][c]; /* Find this row/col in the autoconfig. */ #if DEBUG printf("Looking for %d,%d in autoconfig.\n", r, c); #endif /* DEBUG */ ac = auto_config; while(ac!=NULL) { if (ac->clabel == NULL) { /* Big-time bad news. */ goto fail; } if ((ac->clabel->row == r) && (ac->clabel->column == c) && (ac->clabel->mod_counter == mod_counter)) { /* It's this one... */ /* * Flag it as 'used', so we don't * free it later. */ ac->flag = 1; #if DEBUG printf("Found: %s at %d,%d.\n", ac->devname, r, c); #endif /* DEBUG */ break; } ac = ac->next; } if (ac == NULL) { /* * We didn't find an exact match with a * correct mod_counter above... Can we * find one with an incorrect mod_counter * to use instead ? (This one, if we find * it, will be marked as failed once the * set configures) */ ac = auto_config; while(ac!=NULL) { if (ac->clabel == NULL) { /* Big-time bad news. */ goto fail; } if ((ac->clabel->row == r) && (ac->clabel->column == c)) { /* * It's this one... * Flag it as 'used', so we * don't free it later. */ ac->flag = 1; #if DEBUG printf("Found(low mod_counter)" ": %s at %d,%d.\n", ac->devname, r, c); #endif /* DEBUG */ break; } ac = ac->next; } } if (ac!=NULL) { /* Found it. Configure it... */ diskPtr->blockSize = ac->clabel->blockSize; diskPtr->numBlocks = ac->clabel->numBlocks; /* * Note: rf_protectedSectors is already * factored into numBlocks here. */ raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; memcpy(&raidPtr->raid_cinfo[r][c].ci_label, ac->clabel, sizeof(*ac->clabel)); snprintf(diskPtr->devname, sizeof diskPtr->devname, "/dev/%s", ac->devname); /* * Note the fact that this component was * autoconfigured. You'll need this info * later. Trust me :) */ diskPtr->auto_configured = 1; diskPtr->dev = ac->dev; /* * We allow the user to specify that * only a fraction of the disks should * be used. This is just for debug: it * speeds up the parity scan. */ diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; /* * XXX These will get set multiple times, * but since we're autoconfiguring, they'd * better be always the same each time ! * If not, this is the least of your worries. */ bs = diskPtr->blockSize; min_numblks = diskPtr->numBlocks; /* * This gets done multiple times, but that's * fine -- the serial number will be the same * for all components, guaranteed. */ raidPtr->serial_number = ac->clabel->serial_number; /* * Check the last time the label * was modified. */ if (ac->clabel->mod_counter != mod_counter) { /* * Even though we've filled in all * of the above, we don't trust * this component since it's * modification counter is not * in sync with the rest, and we really * consider it to be failed. */ disks[r][c].status = rf_ds_failed; numFailuresThisRow++; } else { if (ac->clabel->clean != RF_RAID_CLEAN) { parity_good = RF_RAID_DIRTY; } } } else { /* * Didn't find it at all !!! * Component must really be dead. */ disks[r][c].status = rf_ds_failed; snprintf(disks[r][c].devname, sizeof disks[r][c].devname, "component%d", r * raidPtr->numCol + c); numFailuresThisRow++; } } /* XXX Fix for n-fault tolerant. */ /* * XXX This should probably check to see how many failures * we can handle for this configuration ! */ if (numFailuresThisRow > 0) raidPtr->status[r] = rf_rs_degraded; } /* Close the device for the ones that didn't get used. */ ac = auto_config; while(ac != NULL) { if (ac->flag == 0) { VOP_CLOSE(ac->vp, FREAD, NOCRED, curproc); vput(ac->vp); ac->vp = NULL; #if DEBUG printf("Released %s from auto-config set.\n", ac->devname); #endif /* DEBUG */ } ac = ac->next; } raidPtr->mod_counter = mod_counter; /* Note the state of the parity, if any. */ raidPtr->parity_good = parity_good; raidPtr->sectorsPerDisk = min_numblks; raidPtr->logBytesPerSector = ffs(bs) - 1; raidPtr->bytesPerSector = bs; raidPtr->sectorMask = bs - 1; return (0); fail: rf_UnconfigureVnodes(raidPtr); return (ret); }
/**************************************************************************** * Set up the data structures describing the spare disks in the array. * Recall from the above comment that the spare disk descriptors are stored * in row zero, which is specially expanded to hold them. ****************************************************************************/ int rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, RF_Config_t * cfgPtr) { int i, ret; unsigned int bs; RF_RaidDisk_t *disks; int num_spares_done; num_spares_done = 0; /* * The space for the spares should have already been allocated by * ConfigureDisks(). */ disks = &raidPtr->Disks[0][raidPtr->numCol]; for (i = 0; i < raidPtr->numSpare; i++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], &disks[i], 0, raidPtr->numCol + i); if (ret) goto fail; if (disks[i].status != rf_ds_optimal) { RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", &cfgPtr->spare_names[i][0]); } else { /* Change status to spare. */ disks[i].status = rf_ds_spare; DPRINTF6("Spare Disk %d: dev %s numBlocks %ld" " blockSize %d (%ld MB).\n", i, disks[i].devname, (long int) disks[i].numBlocks, disks[i].blockSize, (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024); } num_spares_done++; } /* Check sizes and block sizes on spare disks. */ bs = 1 << raidPtr->logBytesPerSector; for (i = 0; i < raidPtr->numSpare; i++) { if (disks[i].blockSize != bs) { RF_ERRORMSG3("Block size of %d on spare disk %s is" " not the same as on other disks (%d).\n", disks[i].blockSize, disks[i].devname, bs); ret = EINVAL; goto fail; } if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small" " to serve as a spare (need %llu blocks).\n", disks[i].devname, disks[i].numBlocks, raidPtr->sectorsPerDisk); ret = EINVAL; goto fail; } else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { RF_ERRORMSG2("Warning: truncating spare disk" " %s to %llu blocks.\n", disks[i].devname, raidPtr->sectorsPerDisk); disks[i].numBlocks = raidPtr->sectorsPerDisk; } } return (0); fail: /* * Release the hold on the main components. We've failed to allocate * a spare, and since we're failing, we need to free things... * * XXX Failing to allocate a spare is *not* that big of a deal... * We *can* survive without it, if need be, esp. if we get hot * adding working. * If we don't fail out here, then we need a way to remove this spare... * That should be easier to do here than if we are "live"... */ rf_UnconfigureVnodes(raidPtr); return (ret); }
/**************************************************************************** * * Initialize the disks comprising the array. * * We want the spare disks to have regular row,col numbers so that we can * easily substitue a spare for a failed disk. But, the driver code assumes * throughout that the array contains numRow by numCol _non-spare_ disks, so * it's not clear how to fit in the spares. This is an unfortunate holdover * from raidSim. The quick and dirty fix is to make row zero bigger than the * rest, and put all the spares in it. This probably needs to get changed * eventually. * ****************************************************************************/ int rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { RF_RaidDisk_t **disks; RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; RF_RowCol_t r, c; int bs, ret; unsigned i, count, foundone = 0, numFailuresThisRow; int force; force = cfgPtr->force; ret = rf_AllocDiskStructures(raidPtr, cfgPtr); if (ret) goto fail; disks = raidPtr->Disks; for (r = 0; r < raidPtr->numRow; r++) { numFailuresThisRow = 0; for (c = 0; c < raidPtr->numCol; c++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->devnames[r][c][0], &disks[r][c], r, c); if (ret) goto fail; if (disks[r][c].status == rf_ds_optimal) { raidread_component_label( raidPtr->raid_cinfo[r][c].ci_dev, raidPtr->raid_cinfo[r][c].ci_vp, &raidPtr->raid_cinfo[r][c].ci_label); } if (disks[r][c].status != rf_ds_optimal) { numFailuresThisRow++; } else { if (disks[r][c].numBlocks < min_numblks) min_numblks = disks[r][c].numBlocks; DPRINTF7("Disk at row %d col %d: dev %s" " numBlocks %ld blockSize %d (%ld MB)\n", r, c, disks[r][c].devname, (long int) disks[r][c].numBlocks, disks[r][c].blockSize, (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024); } } /* XXX Fix for n-fault tolerant. */ /* * XXX This should probably check to see how many failures * we can handle for this configuration ! */ if (numFailuresThisRow > 0) raidPtr->status[r] = rf_rs_degraded; } /* * All disks must be the same size & have the same block size, bs must * be a power of 2. */ bs = 0; for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { for (c = 0; !foundone && c < raidPtr->numCol; c++) { if (disks[r][c].status == rf_ds_optimal) { bs = disks[r][c].blockSize; foundone = 1; } } } if (!foundone) { RF_ERRORMSG("RAIDFRAME: Did not find any live disks in" " the array.\n"); ret = EINVAL; goto fail; } for (count = 0, i = 1; i; i <<= 1) if (bs & i) count++; if (count != 1) { RF_ERRORMSG1("Error: block size on disks (%d) must be a" " power of 2.\n", bs); ret = EINVAL; goto fail; } if (rf_CheckLabels(raidPtr, cfgPtr)) { printf("raid%d: There were fatal errors\n", raidPtr->raidid); if (force != 0) { printf("raid%d: Fatal errors being ignored.\n", raidPtr->raidid); } else { ret = EINVAL; goto fail; } } for (r = 0; r < raidPtr->numRow; r++) { for (c = 0; c < raidPtr->numCol; c++) { if (disks[r][c].status == rf_ds_optimal) { if (disks[r][c].blockSize != bs) { RF_ERRORMSG2("Error: block size of" " disk at r %d c %d different from" " disk at r 0 c 0.\n", r, c); ret = EINVAL; goto fail; } if (disks[r][c].numBlocks != min_numblks) { RF_ERRORMSG3("WARNING: truncating disk" " at r %d c %d to %d blocks.\n", r, c, (int) min_numblks); disks[r][c].numBlocks = min_numblks; } } } } raidPtr->sectorsPerDisk = min_numblks; raidPtr->logBytesPerSector = ffs(bs) - 1; raidPtr->bytesPerSector = bs; raidPtr->sectorMask = bs - 1; return (0); fail: rf_UnconfigureVnodes(raidPtr); return (ret); }