void rf_ReintLog(RF_Raid_t *raidPtr, int regionID, RF_ParityLog_t *log) { RF_ASSERT(log); /* * Insert an in-core parity log (log) into the disk queue of * reintegration work. Set the flag (reintInProgress) for the * specified region (regionID) to indicate that reintegration is in * progress for this region. NON-BLOCKING */ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* Cleared when reint complete. */ raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; if (rf_parityLogDebug) printf("[requesting reintegration of region %d]\n", log->regionID); /* Move record to reintegration queue. */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); log->next = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = log; RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); }
/* deletes an entry from the ps status table after reconstruction has completed */ void rf_RemoveFromActiveReconTable(RF_Raid_t *raidPtr, RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru) { RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl->pssTable[RF_HASH_PSID(raidPtr, psid)]); RF_ReconParityStripeStatus_t *p, *pt; RF_CallbackDesc_t *cb, *cb1; RF_LOCK_MUTEX(hdr->mutex); while(hdr->lock) { ltsleep(&hdr->lock, PRIBIO, "rf_racrecon", 0, &hdr->mutex); } hdr->lock = 1; RF_UNLOCK_MUTEX(hdr->mutex); for (pt = NULL, p = hdr->chain; p; pt = p, p = p->next) { if ((p->parityStripeID == psid) && (p->which_ru == which_ru)) break; } if (p == NULL) { rf_PrintPSStatusTable(raidPtr); } RF_ASSERT(p); /* it must be there */ Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru); /* delete this entry from the hash chain */ if (pt) pt->next = p->next; else hdr->chain = p->next; p->next = NULL; RF_LOCK_MUTEX(hdr->mutex); hdr->lock = 0; RF_UNLOCK_MUTEX(hdr->mutex); /* wakup anyone waiting on the parity stripe ID */ cb = p->procWaitList; p->procWaitList = NULL; while (cb) { Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID); cb1 = cb->next; (cb->callbackFunc) (cb->callbackArg); rf_FreeCallbackDesc(cb); cb = cb1; } rf_FreePSStatus(raidPtr, p); }
RF_CommonLogData_t * rf_AllocParityLogCommonData(RF_Raid_t *raidPtr) { RF_CommonLogData_t *common = NULL; int rc; /* * Return a struct for holding common parity log information from the * free list (rf_parityLogDiskQueue.freeCommonList). If the free list * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (raidPtr->parityLogDiskQueue.freeCommonList) { common = raidPtr->parityLogDiskQueue.freeCommonList; raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } else { RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); rc = rf_mutex_init(&common->mutex); if (rc) { RF_ERRORMSG3("Unable to init mutex file %s line %d" " rc=%d\n", __FILE__, __LINE__, rc); RF_Free(common, sizeof(RF_CommonLogData_t)); common = NULL; } } common->next = NULL; return (common); }
/* Invoked when the copyback has completed. */ void rf_CopybackComplete(RF_CopybackDesc_t *desc, int status) { RF_Raid_t *raidPtr = desc->raidPtr; struct timeval t, diff; if (!status) { RF_LOCK_MUTEX(raidPtr->mutex); if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); rf_FreeSpareTable(raidPtr); } else { raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; } RF_UNLOCK_MUTEX(raidPtr->mutex); RF_GETTIME(t); RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); printf("Copyback time was %d.%06d seconds.\n", (int) diff.tv_sec, (int) diff.tv_usec); } else printf("COPYBACK: Failure.\n"); RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); rf_FreeMCPair(desc->mcpair); RF_Free(desc, sizeof(*desc)); rf_copyback_in_progress = 0; rf_ResumeNewRequests(raidPtr); }
/* suspends all new requests to the array. No effect on accesses that are in flight. */ int rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr) { #if RF_DEBUG_QUIESCE if (rf_quiesceDebug) printf("raid%d: Suspending new reqs\n", raidPtr->raidid); #endif RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); raidPtr->accesses_suspended++; raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; if (raidPtr->waiting_for_quiescence) { raidPtr->access_suspend_release = 0; while (!raidPtr->access_suspend_release) { #if RF_DEBUG_QUIESCE printf("raid%d: Suspending: Waiting for Quiescence\n", raidPtr->raidid); #endif WAIT_FOR_QUIESCENCE(raidPtr); raidPtr->waiting_for_quiescence = 0; } } #if RF_DEBUG_QUIESCE printf("raid%d: Quiescence reached..\n", raidPtr->raidid); #endif RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); return (raidPtr->waiting_for_quiescence); }
/* wake up everyone waiting for quiescence to be released */ void rf_ResumeNewRequests(RF_Raid_t *raidPtr) { RF_CallbackDesc_t *t, *cb; #if RF_DEBUG_QUIESCE if (rf_quiesceDebug) printf("raid%d: Resuming new requests\n", raidPtr->raidid); #endif RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); raidPtr->accesses_suspended--; if (raidPtr->accesses_suspended == 0) cb = raidPtr->quiesce_wait_list; else cb = NULL; raidPtr->quiesce_wait_list = NULL; RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); while (cb) { t = cb; cb = cb->next; (t->callbackFunc) (t->callbackArg); rf_FreeCallbackDesc(t); } }
RF_ParityLogData_t * rf_AllocParityLogData(RF_Raid_t *raidPtr) { RF_ParityLogData_t *data = NULL; /* * Return a struct for holding parity log information from the free * list (rf_parityLogDiskQueue.freeList). If the free list is empty, * call RF_Malloc to create a new structure. NON-BLOCKING */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (raidPtr->parityLogDiskQueue.freeDataList) { data = raidPtr->parityLogDiskQueue.freeDataList; raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } else { RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); } data->next = NULL; data->prev = NULL; return (data); }
/* * Called whenever an array is shutdown */ static void rf_UnconfigureArray() { int rc; RF_LOCK_MUTEX(configureMutex); if (--configureCount == 0) { /* if no active configurations, shut * everything down */ isconfigged = 0; rc = rf_ShutdownList(&globalShutdown); if (rc) { RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); } rf_shutdown_threadid(); /* * We must wait until now, because the AllocList module * uses the DebugMem module. */ if (rf_memDebug) rf_print_unfreed(); } RF_UNLOCK_MUTEX(configureMutex); }
/* * Called to shut down an array. */ int rf_Shutdown(RF_Raid_t *raidPtr) { if (!raidPtr->valid) { RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); return (EINVAL); } /* * wait for outstanding IOs to land * As described in rf_raid.h, we use the rad_freelist lock * to protect the per-array info about outstanding descs * since we need to do freelist locking anyway, and this * cuts down on the amount of serialization we've got going * on. */ RF_LOCK_MUTEX(rf_rad_lock); if (raidPtr->waitShutdown) { RF_UNLOCK_MUTEX(rf_rad_lock); return (EBUSY); } raidPtr->waitShutdown = 1; while (raidPtr->nAccOutstanding) { RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_lock); } RF_UNLOCK_MUTEX(rf_rad_lock); /* Wait for any parity re-writes to stop... */ while (raidPtr->parity_rewrite_in_progress) { printf("raid%d: Waiting for parity re-write to exit...\n", raidPtr->raidid); tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, "rfprwshutdown", 0); } /* Wait for any reconstruction to stop... */ while (raidPtr->reconInProgress) { printf("raid%d: Waiting for reconstruction to stop...\n", raidPtr->raidid); tsleep(&raidPtr->waitForReconCond, PRIBIO, "rfreshutdown",0); } raidPtr->valid = 0; if (raidPtr->parity_map != NULL) rf_paritymap_detach(raidPtr); rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); rf_UnconfigureVnodes(raidPtr); rf_FreeEmergBuffers(raidPtr); rf_ShutdownList(&raidPtr->shutdownList); rf_UnconfigureArray(); return (0); }
int rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon) { /* need to suspend IO's here -- if there are DAGs in flight and we pull the rug out from under ci_vp, Bad Things can happen. */ rf_SuspendNewRequestsAndWait(raidPtr); RF_LOCK_MUTEX(raidPtr->mutex); if (raidPtr->Disks[fcol].status != rf_ds_failed) { /* must be failing something that is valid, or else it's already marked as failed (in which case we don't want to mark it failed again!) */ raidPtr->numFailures++; raidPtr->Disks[fcol].status = rf_ds_failed; raidPtr->status = rf_rs_degraded; } RF_UNLOCK_MUTEX(raidPtr->mutex); rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); /* Close the component, so that it's not "locked" if someone else want's to use it! */ rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp, raidPtr->Disks[fcol].auto_configured); RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->raid_cinfo[fcol].ci_vp = NULL; /* Need to mark the component as not being auto_configured (in case it was previously). */ raidPtr->Disks[fcol].auto_configured = 0; RF_UNLOCK_MUTEX(raidPtr->mutex); /* now we can allow IO to continue -- we'll be suspending it again in rf_ReconstructFailedDisk() if we have to.. */ rf_ResumeNewRequests(raidPtr); if (initRecon) rf_ReconstructFailedDisk(raidPtr, fcol); return (0); }
/* Copyback one unit. */ void rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr, RF_RowCol_t testRow, RF_RowCol_t testCol, RF_SectorNum_t testOffs) { RF_SectorCount_t sectPerSU = desc->sectPerSU; RF_Raid_t *raidPtr = desc->raidPtr; RF_RowCol_t spRow = desc->spRow; RF_RowCol_t spCol = desc->spCol; RF_SectorNum_t spOffs; /* Find the spare location for this SU. */ if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { if (typ == RF_COPYBACK_DATA) raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); else raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); } else { spOffs = testOffs; } /* Create reqs to read the old location & write the new. */ desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); desc->frow = testRow; desc->fcol = testCol; /* * Enqueue the read. The write will go out as part of the callback on * the read. At user-level & in the kernel, wait for the read-write * pair to complete. In the simulator, just return, since everything * will happen as callbacks. */ RF_LOCK_MUTEX(desc->mcpair->mutex); desc->mcpair->flag = 0; rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); while (!desc->mcpair->flag) { RF_WAIT_MCPAIR(desc->mcpair); } RF_UNLOCK_MUTEX(desc->mcpair->mutex); rf_FreeDiskQueueData(desc->readreq); rf_FreeDiskQueueData(desc->writereq); }
void rf_EnableParityLogging(RF_Raid_t *raidPtr) { int regionID; for (regionID = 0; regionID < rf_numParityRegions; regionID++) { RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); } if (rf_parityLogDebug) printf("[parity logging enabled]\n"); }
RF_RaidAccessDesc_t * rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type, RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, void *bufPtr, void *bp, RF_RaidAccessFlags_t flags, const RF_AccessState_t *states) { RF_RaidAccessDesc_t *desc; desc = pool_get(&rf_pools.rad, PR_WAITOK); RF_LOCK_MUTEX(rf_rad_lock); if (raidPtr->waitShutdown) { /* * Actually, we're shutting the array down. Free the desc * and return NULL. */ RF_UNLOCK_MUTEX(rf_rad_lock); pool_put(&rf_pools.rad, desc); return (NULL); } raidPtr->nAccOutstanding++; RF_UNLOCK_MUTEX(rf_rad_lock); desc->raidPtr = (void *) raidPtr; desc->type = type; desc->raidAddress = raidAddress; desc->numBlocks = numBlocks; desc->bufPtr = bufPtr; desc->bp = bp; desc->flags = flags; desc->states = states; desc->state = 0; desc->dagList = NULL; desc->status = 0; desc->numRetries = 0; #if RF_ACC_TRACE > 0 memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t)); #endif desc->callbackFunc = NULL; desc->callbackArg = NULL; desc->next = NULL; desc->iobufs = NULL; desc->stripebufs = NULL; return (desc); }
int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc) { RF_Raid_t *raidPtr; raidPtr = desc->raidPtr; /* * Bummer. We have to do this to be 100% safe w.r.t. the increment * below. */ RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); raidPtr->accs_in_flight++; /* Used to detect quiescence. */ RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); desc->state++; return RF_FALSE; }
void rf_FreeParityLogCommonData(RF_CommonLogData_t *common) { RF_Raid_t *raidPtr; /* * Insert a single struct for holding parity log information (data) * into the free list (rf_parityLogDiskQueue.freeCommonList). * NON-BLOCKING */ raidPtr = common->raidPtr; RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); common->next = raidPtr->parityLogDiskQueue.freeCommonList; raidPtr->parityLogDiskQueue.freeCommonList = common; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); }
void rf_DrainReconEventQueue(RF_RaidReconDesc_t *reconDesc) { RF_ReconCtrl_t *rctrl = reconDesc->raidPtr->reconControl; RF_ReconEvent_t *event; RF_LOCK_MUTEX(rctrl->eq_mutex); while (rctrl->eventQueue!=NULL) { event = rctrl->eventQueue; rctrl->eventQueue = event->next; event->next = NULL; rctrl->eq_count--; /* dump it */ rf_FreeReconEventDesc(event); } RF_UNLOCK_MUTEX(rctrl->eq_mutex); }
RF_ParityLog_t * rf_AcquireParityLog(RF_ParityLogData_t *logData, int finish) { RF_ParityLog_t *log = NULL; RF_Raid_t *raidPtr; /* * Grab a log buffer from the pool and return it. If no buffers are * available, return NULL. NON-BLOCKING */ raidPtr = logData->common->raidPtr; RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); if (raidPtr->parityLogPool.parityLogs) { log = raidPtr->parityLogPool.parityLogs; raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; log->regionID = logData->regionID; log->numRecords = 0; log->next = NULL; raidPtr->logsInUse++; RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); } else { /* * No logs available, so place ourselves on the queue of work * waiting on log buffers this is done while * parityLogPool.mutex is held, to ensure synchronization with * ReleaseParityLogs. */ if (rf_parityLogDebug) printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); if (finish) rf_RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); else rf_EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); } RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); return (log); }
void rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, RF_SectorNum_t startSector, RF_SectorNum_t stopSector) { RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; RF_SectorNum_t i, first_in_RU, last_in_RU; RF_ReconMapListElem_t *p, *pt; RF_LOCK_MUTEX(mapPtr->mutex); RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && stopSector >= startSector); while (startSector <= stopSector) { i = startSector / mapPtr->sectorsPerReconUnit; first_in_RU = i * sectorsPerReconUnit; last_in_RU = first_in_RU + sectorsPerReconUnit - 1; p = mapPtr->status[i]; if (p != RU_ALL) { if (p == RU_NOTHING || p->startSector > startSector) { /* Insert at front of list. */ mapPtr->status[i] = rf_MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p); rf_update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); } else {/* General case. */ do { /* Search for place to insert. */ pt = p; p = p->next; } while (p && (p->startSector < startSector)); pt->next = rf_MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), p); rf_update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); } rf_compact_stat_entry(raidPtr, mapPtr, i); } startSector = RF_MIN(stopSector, last_in_RU) + 1; } RF_UNLOCK_MUTEX(mapPtr->mutex); }
int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc) { RF_Raid_t *raidPtr; raidPtr = desc->raidPtr; RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); raidPtr->accs_in_flight--; if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc); } rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks); RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); desc->state++; return RF_FALSE; }
/* force the array into reconfigured mode without doing reconstruction */ int rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col) { if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { printf("Can't set reconfigured mode in dedicated-spare array\n"); RF_PANIC(); } RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->numFailures++; raidPtr->Disks[col].status = rf_ds_dist_spared; raidPtr->status = rf_rs_reconfigured; rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); /* install spare table only if declustering + distributed sparing * architecture. */ if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) rf_InstallSpareTable(raidPtr, col); RF_UNLOCK_MUTEX(raidPtr->mutex); return (0); }
void rf_FlushLog(RF_Raid_t *raidPtr, RF_ParityLog_t *log) { /* * Insert a core log (log) into a list of logs * (parityLogDiskQueue.flushQueue) waiting to be written to disk. * NON-BLOCKING */ RF_ASSERT(log); RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); RF_ASSERT(log->next == NULL); /* Move log to flush queue. */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); log->next = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = log; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); }
RF_ParityLogData_t * rf_DequeueParityLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head, RF_ParityLogData_t **tail, int ignoreLocks) { RF_ParityLogData_t *data; /* * Remove and return an in-core parity log from the tail of a disk * queue (*head, *tail). NON-BLOCKING */ /* Remove from tail, preserving FIFO order. */ if (!ignoreLocks) RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); data = *tail; if (data) { if (*head == *tail) { /* Removing last item from queue. */ *head = NULL; *tail = NULL; } else { *tail = (*tail)->prev; (*tail)->next = NULL; RF_ASSERT((*head)->prev == NULL); RF_ASSERT((*tail)->next == NULL); } data->next = NULL; data->prev = NULL; if (rf_parityLogDebug) printf("[dequeueing parity log data, region %d," " raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); } if (*head) { RF_ASSERT((*head)->prev == NULL); RF_ASSERT((*tail)->next == NULL); } if (!ignoreLocks) RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); return (data); }
int rf_State_Quiesce(RF_RaidAccessDesc_t *desc) { RF_AccTraceEntry_t *tracerec = &desc->tracerec; RF_Etimer_t timer; int suspended = RF_FALSE; RF_Raid_t *raidPtr; raidPtr = desc->raidPtr; RF_ETIMER_START(timer); RF_ETIMER_START(desc->timer); RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); if (raidPtr->accesses_suspended) { RF_CallbackDesc_t *cb; cb = rf_AllocCallbackDesc(); /* * XXX The following cast is quite bogus... * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) * as an argument... GO */ cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess; cb->callbackArg.p = (void *) desc; cb->next = raidPtr->quiesce_wait_list; raidPtr->quiesce_wait_list = cb; suspended = RF_TRUE; } RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); if (suspended && rf_quiesceDebug) printf("Stalling access due to quiescence lock.\n"); desc->state++; return suspended; }
int rf_State_LastState(RF_RaidAccessDesc_t *desc) { void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc; RF_CBParam_t callbackArg; callbackArg.p = desc->callbackArg; /* * If this is not an async request, wake up the caller. */ if (desc->async_flag == 0) wakeup(desc->bp); /* * That's all the IO for this one... Unbusy the 'disk'. */ rf_disk_unbusy(desc); /* * Wakeup any requests waiting to go. */ RF_LOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); ((RF_Raid_t *) desc->raidPtr)->openings++; RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); /* Wake up any pending I/O. */ raidstart(((RF_Raid_t *) desc->raidPtr)); /* printf("%s: Calling biodone on 0x%x.\n", __func__, desc->bp); */ splassert(IPL_BIO); biodone(desc->bp); /* Access came through ioctl. */ if (callbackFunc) callbackFunc(callbackArg); rf_FreeRaidAccDesc(desc); return RF_FALSE; }
/* enqueues a reconstruction event on the indicated queue */ void rf_CauseReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t col, void *arg, RF_Revent_t type) { RF_ReconCtrl_t *rctrl = raidPtr->reconControl; RF_ReconEvent_t *event = GetReconEventDesc(col, arg, type); if (type == RF_REVENT_BUFCLEAR) { RF_ASSERT(col != rctrl->fcol); } RF_ASSERT(col >= 0 && col <= raidPtr->numCol); RF_LOCK_MUTEX(rctrl->eq_mutex); /* q null and count==0 must be equivalent conditions */ RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); event->next = rctrl->eventQueue; rctrl->eventQueue = event; rctrl->eq_count++; RF_UNLOCK_MUTEX(rctrl->eq_mutex); wakeup(&(rctrl)->eventQueue); }
void rf_FreeParityLogData(RF_ParityLogData_t *data) { RF_ParityLogData_t *nextItem; RF_Raid_t *raidPtr; /* * Insert a linked list of structs for holding parity log information * (data) into the free list (parityLogDiskQueue.freeList). * NON-BLOCKING */ raidPtr = data->common->raidPtr; RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); while (data) { nextItem = data->next; data->next = raidPtr->parityLogDiskQueue.freeDataList; raidPtr->parityLogDiskQueue.freeDataList = data; data = nextItem; } RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); }
void rf_EnqueueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head, RF_ParityLogData_t **tail) { RF_Raid_t *raidPtr; /* * Insert an in-core parity log (*data) into the head of a disk queue * (*head, *tail). NON-BLOCKING */ raidPtr = data->common->raidPtr; if (rf_parityLogDebug) printf("[enqueueing parity log data, region %d," " raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); RF_ASSERT(data->prev == NULL); RF_ASSERT(data->next == NULL); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (*head) { /* Insert into head of queue. */ RF_ASSERT((*head)->prev == NULL); RF_ASSERT((*tail)->next == NULL); data->next = *head; (*head)->prev = data; *head = data; } else { /* Insert into empty list. */ RF_ASSERT(*head == NULL); RF_ASSERT(*tail == NULL); *head = data; *tail = data; } RF_ASSERT((*head)->prev == NULL); RF_ASSERT((*tail)->next == NULL); RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); }
/* * This function is really just for debugging user-level stuff: it * frees up all memory, other RAIDframe resources which might otherwise * be kept around. This is used with systems like "sentinel" to detect * memory leaks. */ int rf_UnbootRaidframe() { int rc; RF_LOCK_MUTEX(configureMutex); if (configureCount) { RF_UNLOCK_MUTEX(configureMutex); return (EBUSY); } raidframe_booted = 0; RF_UNLOCK_MUTEX(configureMutex); rc = rf_mutex_destroy(&configureMutex); if (rc) { RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__, __LINE__, rc); RF_PANIC(); } #if RF_DEBUG_ATOMIC > 0 rf_atent_shutdown(); #endif /* RF_DEBUG_ATOMIC > 0 */ return (0); }
void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc) { RF_Raid_t *raidPtr = desc->raidPtr; RF_DagList_t *dagList, *temp; RF_VoidPointerListElem_t *tmp; RF_ASSERT(desc); /* Cleanup the dagList(s) */ dagList = desc->dagList; while(dagList != NULL) { temp = dagList; dagList = dagList->next; rf_FreeDAGList(temp); } while (desc->iobufs) { tmp = desc->iobufs; desc->iobufs = desc->iobufs->next; rf_FreeIOBuffer(raidPtr, tmp); } while (desc->stripebufs) { tmp = desc->stripebufs; desc->stripebufs = desc->stripebufs->next; rf_FreeStripeBuffer(raidPtr, tmp); } pool_put(&rf_pools.rad, desc); RF_LOCK_MUTEX(rf_rad_lock); raidPtr->nAccOutstanding--; if (raidPtr->waitShutdown) { RF_SIGNAL_COND(raidPtr->outstandingCond); } RF_UNLOCK_MUTEX(rf_rad_lock); }
/* Do a complete copyback. */ void rf_CopybackReconstructedData(RF_Raid_t *raidPtr) { RF_ComponentLabel_t c_label; int done, retcode; RF_CopybackDesc_t *desc; RF_RowCol_t frow, fcol; RF_RaidDisk_t *badDisk; char *databuf; struct partinfo dpart; struct vnode *vp; struct vattr va; struct proc *proc; int ac; done = 0; fcol = 0; for (frow = 0; frow < raidPtr->numRow; frow++) { for (fcol = 0; fcol < raidPtr->numCol; fcol++) { if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { done = 1; break; } } if (done) break; } if (frow == raidPtr->numRow) { printf("COPYBACK: No disks need copyback.\n"); return; } badDisk = &raidPtr->Disks[frow][fcol]; proc = raidPtr->engine_thread; /* * This device may have been opened successfully the first time. * Close it before trying to open it again. */ if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { printf("Close the opened device: %s.\n", raidPtr->Disks[frow][fcol].devname); vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; ac = raidPtr->Disks[frow][fcol].auto_configured; rf_close_component(raidPtr, vp, ac); raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; } /* Note that this disk was *not* auto_configured (any longer). */ raidPtr->Disks[frow][fcol].auto_configured = 0; printf("About to (re-)open the device: %s.\n", raidPtr->Disks[frow][fcol].devname); retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp); if (retcode) { printf("COPYBACK: raidlookup on device: %s failed: %d !\n", raidPtr->Disks[frow][fcol].devname, retcode); /* * XXX The component isn't responding properly... Must be * still dead :-( */ return; } else { /* * Ok, so we can at least do a lookup... * How about actually getting a vp for it ? */ if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { return; } retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) &dpart, FREAD, proc->p_ucred, proc); if (retcode) { return; } raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize; raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size - rf_protectedSectors; raidPtr->raid_cinfo[frow][fcol].ci_vp = vp; raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev; /* XXX Or the above ? */ raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* * We allow the user to specify that only a fraction of the * disks should be used this is just for debug: it speeds up * the parity scan. */ raidPtr->Disks[frow][fcol].numBlocks = raidPtr->Disks[frow][fcol].numBlocks * rf_sizePercentage / 100; } #if 0 /* This is the way it was done before the CAM stuff was removed. */ if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) { printf("COPYBACK: unable to extract bus, target, lun from" " devname %s.\n", badDisk->devname); return; } /* * TUR the disk that's marked as bad to be sure that it's actually * alive. */ rf_SCSI_AllocTUR(&tur_op); retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev); rf_SCSI_FreeDiskOp(tur_op, 0); #endif if (retcode) { printf("COPYBACK: target disk failed TUR.\n"); return; } /* Get a buffer to hold one SU. */ RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); /* Create a descriptor. */ RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); desc->raidPtr = raidPtr; desc->status = 0; desc->frow = frow; desc->fcol = fcol; desc->spRow = badDisk->spareRow; desc->spCol = badDisk->spareCol; desc->stripeAddr = 0; desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; desc->databuf = databuf; desc->mcpair = rf_AllocMCPair(); printf("COPYBACK: Quiescing the array.\n"); /* * Quiesce the array, since we don't want to code support for user * accs here. */ rf_SuspendNewRequestsAndWait(raidPtr); /* Adjust state of the array and of the disks. */ RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; raidPtr->status[desc->frow] = rf_rs_optimal; rf_copyback_in_progress = 1; /* Debug only. */ RF_UNLOCK_MUTEX(raidPtr->mutex); printf("COPYBACK: Beginning\n"); RF_GETTIME(desc->starttime); rf_ContinueCopyback(desc); /* * Data has been restored. * Fix up the component label. * Don't actually need the read here. */ raidread_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev, raidPtr->raid_cinfo[frow][fcol].ci_vp, &c_label); raid_init_component_label(raidPtr, &c_label); c_label.row = frow; c_label.column = fcol; raidwrite_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev, raidPtr->raid_cinfo[frow][fcol].ci_vp, &c_label); }