/* KeepAlive * * While dumping the database, keeps the connection alive. * Every 10 seconds, wake up and ask to read 0 bytes of the database. * This resets the database's internal timer so that it does not * prematuraly quit (on asking for new tapes and such). * * Use the same udbHandle as writeDbDump so we go to the same server. */ void * KeepAlive(void *unused) { charListT charList; afs_int32 code; afs_int32 done; extern struct udbHandleS udbHandle; afs_pthread_setname_self("Keep-alive"); while (1) { #ifdef AFS_PTHREAD_ENV sleep(5); #else IOMGR_Sleep(5); #endif charList.charListT_val = 0; charList.charListT_len = 0; code = ubik_Call_SingleServer(BUDB_DumpDB, udbHandle.uh_client, UF_SINGLESERVER, 0, 0, &charList, &done); if (code || done) break; } return 0; }
/* background daemon for timing out transactions */ static void* BKGLoop(void *unused) { struct timeval tv; int loop = 0; afs_pthread_setname_self("vol bkg"); while (1) { tv.tv_sec = GCWAKEUP; tv.tv_usec = 0; #ifdef AFS_PTHREAD_ENV #ifdef AFS_NT40_ENV Sleep(GCWAKEUP * 1000); #else select(0, 0, 0, 0, &tv); #endif #else (void)IOMGR_Select(0, 0, 0, 0, &tv); #endif GCTrans(); TryUnlock(); loop++; if (loop == 10) { /* reopen log every 5 minutes */ loop = 0; ReOpenLog(); } } AFS_UNREACHED(return(NULL)); }
void * ih_sync_thread(void *dummy) { afs_pthread_setname_self("ih_syncer"); while(1) { #ifdef AFS_PTHREAD_ENV sleep(10); #else /* AFS_PTHREAD_ENV */ IOMGR_Sleep(60); #endif /* AFS_PTHREAD_ENV */ ih_sync_all(); } return NULL; }
/*! * \brief Main interaction loop for the recovery manager * * The recovery light-weight process only runs when you're the * synchronization site. It performs the following tasks, if and only * if the prerequisite tasks have been performed successfully (it * keeps track of which ones have been performed in its bit map, * \p urecovery_state). * * First, it is responsible for probing that all servers are up. This * is the only operation that must be performed even if this is not * yet the sync site, since otherwise this site may not notice that * enough other machines are running to even elect this guy to be the * sync site. * * After that, the recovery process does nothing until the beacon and * voting modules manage to get this site elected sync site. * * After becoming sync site, recovery first attempts to find the best * database available in the network (it must do this in order to * ensure finding the latest committed data). After finding the right * database, it must fetch this dbase to the sync site. * * After fetching the dbase, it relabels it with a new version number, * to ensure that everyone recognizes this dbase as the most recent * dbase. * * One the dbase has been relabelled, this machine can start handling * requests. However, the recovery module still has one more task: * propagating the dbase out to everyone who is up in the network. */ void * urecovery_Interact(void *dummy) { afs_int32 code, tcode; struct ubik_server *bestServer = NULL; struct ubik_server *ts; int dbok, doingRPC, now; afs_int32 lastProbeTime; /* if we're the sync site, the best db version we've found yet */ static struct ubik_version bestDBVersion; struct ubik_version tversion; struct timeval tv; int length, tlen, offset, file, nbytes; struct rx_call *rxcall; char tbuffer[1024]; struct ubik_stat ubikstat; struct in_addr inAddr; char hoststr[16]; char pbuffer[1028]; int fd = -1; afs_int32 pass; afs_pthread_setname_self("recovery"); /* otherwise, begin interaction */ urecovery_state = 0; lastProbeTime = 0; while (1) { /* Run through this loop every 4 seconds */ tv.tv_sec = 4; tv.tv_usec = 0; #ifdef AFS_PTHREAD_ENV select(0, 0, 0, 0, &tv); #else IOMGR_Select(0, 0, 0, 0, &tv); #endif ubik_dprint("recovery running in state %x\n", urecovery_state); /* Every 30 seconds, check all the down servers and mark them * as up if they respond. When a server comes up or found to * not be current, then re-find the the best database and * propogate it. */ if ((now = FT_ApproxTime()) > 30 + lastProbeTime) { for (ts = ubik_servers, doingRPC = 0; ts; ts = ts->next) { UBIK_BEACON_LOCK; if (!ts->up) { UBIK_BEACON_UNLOCK; doingRPC = 1; code = DoProbe(ts); if (code == 0) { UBIK_BEACON_LOCK; ts->up = 1; UBIK_BEACON_UNLOCK; DBHOLD(ubik_dbase); urecovery_state &= ~UBIK_RECFOUNDDB; DBRELE(ubik_dbase); } } else { UBIK_BEACON_UNLOCK; DBHOLD(ubik_dbase); if (!ts->currentDB) urecovery_state &= ~UBIK_RECFOUNDDB; DBRELE(ubik_dbase); } } if (doingRPC) now = FT_ApproxTime(); lastProbeTime = now; } /* Mark whether we are the sync site */ DBHOLD(ubik_dbase); if (!ubeacon_AmSyncSite()) { urecovery_state &= ~UBIK_RECSYNCSITE; DBRELE(ubik_dbase); continue; /* nothing to do */ } urecovery_state |= UBIK_RECSYNCSITE; /* If a server has just come up or if we have not found the * most current database, then go find the most current db. */ if (!(urecovery_state & UBIK_RECFOUNDDB)) { DBRELE(ubik_dbase); bestServer = (struct ubik_server *)0; bestDBVersion.epoch = 0; bestDBVersion.counter = 0; for (ts = ubik_servers; ts; ts = ts->next) { UBIK_BEACON_LOCK; if (!ts->up) { UBIK_BEACON_UNLOCK; continue; /* don't bother with these guys */ } UBIK_BEACON_UNLOCK; if (ts->isClone) continue; UBIK_ADDR_LOCK; code = DISK_GetVersion(ts->disk_rxcid, &ts->version); UBIK_ADDR_UNLOCK; if (code == 0) { /* perhaps this is the best version */ if (vcmp(ts->version, bestDBVersion) > 0) { /* new best version */ bestDBVersion = ts->version; bestServer = ts; } } } /* take into consideration our version. Remember if we, * the sync site, have the best version. Also note that * we may need to send the best version out. */ DBHOLD(ubik_dbase); if (vcmp(ubik_dbase->version, bestDBVersion) >= 0) { bestDBVersion = ubik_dbase->version; bestServer = (struct ubik_server *)0; urecovery_state |= UBIK_RECHAVEDB; } else { /* Clear the flag only when we know we have to retrieve * the db. Because urecovery_AllBetter() looks at it. */ urecovery_state &= ~UBIK_RECHAVEDB; } urecovery_state |= UBIK_RECFOUNDDB; urecovery_state &= ~UBIK_RECSENTDB; } if (!(urecovery_state & UBIK_RECFOUNDDB)) { DBRELE(ubik_dbase); continue; /* not ready */ } /* If we, the sync site, do not have the best db version, then * go and get it from the server that does. */ if ((urecovery_state & UBIK_RECHAVEDB) || !bestServer) { urecovery_state |= UBIK_RECHAVEDB; } else { /* we don't have the best version; we should fetch it. */ urecovery_AbortAll(ubik_dbase); /* Rx code to do the Bulk fetch */ file = 0; offset = 0; UBIK_ADDR_LOCK; rxcall = rx_NewCall(bestServer->disk_rxcid); ubik_print("Ubik: Synchronize database with server %s\n", afs_inet_ntoa_r(bestServer->addr[0], hoststr)); UBIK_ADDR_UNLOCK; code = StartDISK_GetFile(rxcall, file); if (code) { ubik_dprint("StartDiskGetFile failed=%d\n", code); goto FetchEndCall; } nbytes = rx_Read(rxcall, (char *)&length, sizeof(afs_int32)); length = ntohl(length); if (nbytes != sizeof(afs_int32)) { ubik_dprint("Rx-read length error=%d\n", code = BULK_ERROR); code = EIO; goto FetchEndCall; } /* give invalid label during file transit */ UBIK_VERSION_LOCK; tversion.epoch = 0; code = (*ubik_dbase->setlabel) (ubik_dbase, file, &tversion); UBIK_VERSION_UNLOCK; if (code) { ubik_dprint("setlabel io error=%d\n", code); goto FetchEndCall; } snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d.TMP", ubik_dbase->pathName, (file<0)?"SYS":"", (file<0)?-file:file); fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600); if (fd < 0) { code = errno; goto FetchEndCall; } code = lseek(fd, HDRSIZE, 0); if (code != HDRSIZE) { close(fd); goto FetchEndCall; } pass = 0; while (length > 0) { tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length); #ifndef AFS_PTHREAD_ENV if (pass % 4 == 0) IOMGR_Poll(); #endif nbytes = rx_Read(rxcall, tbuffer, tlen); if (nbytes != tlen) { ubik_dprint("Rx-read bulk error=%d\n", code = BULK_ERROR); code = EIO; close(fd); goto FetchEndCall; } nbytes = write(fd, tbuffer, tlen); pass++; if (nbytes != tlen) { code = UIOERROR; close(fd); goto FetchEndCall; } offset += tlen; length -= tlen; } code = close(fd); if (code) goto FetchEndCall; code = EndDISK_GetFile(rxcall, &tversion); FetchEndCall: tcode = rx_EndCall(rxcall, code); if (!code) code = tcode; if (!code) { /* we got a new file, set up its header */ urecovery_state |= UBIK_RECHAVEDB; UBIK_VERSION_LOCK; memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version)); snprintf(tbuffer, sizeof(tbuffer), "%s.DB%s%d", ubik_dbase->pathName, (file<0)?"SYS":"", (file<0)?-file:file); #ifdef AFS_NT40_ENV snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d.OLD", ubik_dbase->pathName, (file<0)?"SYS":"", (file<0)?-file:file); code = unlink(pbuffer); if (!code) code = rename(tbuffer, pbuffer); snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d.TMP", ubik_dbase->pathName, (file<0)?"SYS":"", (file<0)?-file:file); #endif if (!code) code = rename(pbuffer, tbuffer); if (!code) { (*ubik_dbase->open) (ubik_dbase, file); /* after data is good, sync disk with correct label */ code = (*ubik_dbase->setlabel) (ubik_dbase, 0, &ubik_dbase->version); } UBIK_VERSION_UNLOCK; #ifdef AFS_NT40_ENV snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d.OLD", ubik_dbase->pathName, (file<0)?"SYS":"", (file<0)?-file:file); unlink(pbuffer); #endif } if (code) { unlink(pbuffer); /* * We will effectively invalidate the old data forever now. * Unclear if we *should* but we do. */ UBIK_VERSION_LOCK; ubik_dbase->version.epoch = 0; ubik_dbase->version.counter = 0; UBIK_VERSION_UNLOCK; ubik_print("Ubik: Synchronize database failed (error = %d)\n", code); } else { ubik_print("Ubik: Synchronize database completed\n"); urecovery_state |= UBIK_RECHAVEDB; } udisk_Invalidate(ubik_dbase, 0); /* data has changed */ #ifdef AFS_PTHREAD_ENV CV_BROADCAST(&ubik_dbase->version_cond); #else LWP_NoYieldSignal(&ubik_dbase->version); #endif } if (!(urecovery_state & UBIK_RECHAVEDB)) { DBRELE(ubik_dbase); continue; /* not ready */ } /* If the database was newly initialized, then when we establish quorum, write * a new label. This allows urecovery_AllBetter() to allow access for reads. * Setting it to 2 also allows another site to come along with a newer * database and overwrite this one. */ if (ubik_dbase->version.epoch == 1) { urecovery_AbortAll(ubik_dbase); UBIK_VERSION_LOCK; version_globals.ubik_epochTime = 2; ubik_dbase->version.epoch = version_globals.ubik_epochTime; ubik_dbase->version.counter = 1; code = (*ubik_dbase->setlabel) (ubik_dbase, 0, &ubik_dbase->version); UBIK_VERSION_UNLOCK; udisk_Invalidate(ubik_dbase, 0); /* data may have changed */ #ifdef AFS_PTHREAD_ENV CV_BROADCAST(&ubik_dbase->version_cond); #else LWP_NoYieldSignal(&ubik_dbase->version); #endif } /* Check the other sites and send the database to them if they * do not have the current db. */ if (!(urecovery_state & UBIK_RECSENTDB)) { /* now propagate out new version to everyone else */ dbok = 1; /* start off assuming they all worked */ /* * Check if a write transaction is in progress. We can't send the * db when a write is in progress here because the db would be * obsolete as soon as it goes there. Also, ops after the begin * trans would reach the recepient and wouldn't find a transaction * pending there. Frankly, I don't think it's possible to get past * the write-lock above if there is a write transaction in progress, * but then, it won't hurt to check, will it? */ if (ubik_dbase->flags & DBWRITING) { struct timeval tv; int safety = 0; long cur_usec = 50000; while ((ubik_dbase->flags & DBWRITING) && (safety < 500)) { DBRELE(ubik_dbase); /* sleep for a little while */ tv.tv_sec = 0; tv.tv_usec = cur_usec; #ifdef AFS_PTHREAD_ENV select(0, 0, 0, 0, &tv); #else IOMGR_Select(0, 0, 0, 0, &tv); #endif cur_usec += 10000; safety++; DBHOLD(ubik_dbase); } } for (ts = ubik_servers; ts; ts = ts->next) { UBIK_ADDR_LOCK; inAddr.s_addr = ts->addr[0]; UBIK_ADDR_UNLOCK; UBIK_BEACON_LOCK; if (!ts->up) { UBIK_BEACON_UNLOCK; ubik_dprint("recovery cannot send version to %s\n", afs_inet_ntoa_r(inAddr.s_addr, hoststr)); dbok = 0; continue; } UBIK_BEACON_UNLOCK; ubik_dprint("recovery sending version to %s\n", afs_inet_ntoa_r(inAddr.s_addr, hoststr)); if (vcmp(ts->version, ubik_dbase->version) != 0) { ubik_dprint("recovery stating local database\n"); /* Rx code to do the Bulk Store */ code = (*ubik_dbase->stat) (ubik_dbase, 0, &ubikstat); if (!code) { length = ubikstat.size; file = offset = 0; UBIK_ADDR_LOCK; rxcall = rx_NewCall(ts->disk_rxcid); UBIK_ADDR_UNLOCK; code = StartDISK_SendFile(rxcall, file, length, &ubik_dbase->version); if (code) { ubik_dprint("StartDiskSendFile failed=%d\n", code); goto StoreEndCall; } while (length > 0) { tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length); nbytes = (*ubik_dbase->read) (ubik_dbase, file, tbuffer, offset, tlen); if (nbytes != tlen) { ubik_dprint("Local disk read error=%d\n", code = UIOERROR); goto StoreEndCall; } nbytes = rx_Write(rxcall, tbuffer, tlen); if (nbytes != tlen) { ubik_dprint("Rx-write bulk error=%d\n", code = BULK_ERROR); goto StoreEndCall; } offset += tlen; length -= tlen; } code = EndDISK_SendFile(rxcall); StoreEndCall: code = rx_EndCall(rxcall, code); } if (code == 0) { /* we set a new file, process its header */ ts->version = ubik_dbase->version; ts->currentDB = 1; } else dbok = 0; } else { /* mark file up to date */ ts->currentDB = 1; } } if (dbok) urecovery_state |= UBIK_RECSENTDB; } DBRELE(ubik_dbase); } return NULL; }
void * saveDbToTape(void *param) { struct saveDbIf *saveDbIfPtr = (struct saveDbIf *)param; afs_int32 code; afs_int32 i; int wroteLabel; afs_uint32 taskId; Date expires; struct butm_tapeInfo tapeInfo; struct budb_dumpEntry dumpEntry; extern struct deviceSyncNode *deviceLatch; extern struct tapeConfig globalTapeConfig; afs_pthread_setname_self("Db save"); expires = (saveDbIfPtr->archiveTime ? NEVERDATE : 0); taskId = saveDbIfPtr->taskId; dumpEntry.id = 0; setStatus(taskId, DRIVE_WAIT); EnterDeviceQueue(deviceLatch); /* lock tape device */ clearStatus(taskId, DRIVE_WAIT); printf("\n\n"); TLog(taskId, "SaveDb\n"); tapeInfo.structVersion = BUTM_MAJORVERSION; code = butm_file_Instantiate(&tapeInfo, &globalTapeConfig); if (code) { ErrorLog(0, taskId, code, tapeInfo.error, "Can't initialize tape module\n"); ERROR_EXIT(code); } /* Determine what the last database dump was */ memset(&lastDump, 0, sizeof(lastDump)); code = bcdb_FindLatestDump("", "", &lastDump); if (code) { if (code != BUDB_NODUMPNAME) { ErrorLog(0, taskId, code, 0, "Can't read backup database\n"); ERROR_EXIT(code); } memset(&lastDump, 0, sizeof(lastDump)); } code = CreateDBDump(&dumpEntry); /* Create a dump for this tape */ if (code) { ErrorLog(0, taskId, code, 0, "Can't create dump in database\n"); ERROR_EXIT(code); } listEntryHead = NULL; /* Get the tape and write a new label to it */ code = GetDBTape(taskId, expires, &tapeInfo, dumpEntry.id, 1, autoQuery, &wroteLabel); /* * If did not write the label, remove created dump * Else if wrote the label, remove old dump from db so it's not saved. */ if (!wroteLabel) { i = bcdb_deleteDump(dumpEntry.id, 0, 0, 0); dumpEntry.id = 0; if (i && (i != BUDB_NOENT)) ErrorLog(0, taskId, i, 0, "Unable to delete DB entry %u.\n", dumpEntry.id); } else if (listEntryHead->oldDumpId) { i = bcdb_deleteDump(listEntryHead->oldDumpId, 0, 0, 0); listEntryHead->oldDumpId = 0; if (i && (i != BUDB_NOENT)) { ErrorLog(0, taskId, i, 0, "Unable to delete old DB entry %u.\n", listEntryHead->oldDumpId); ERROR_EXIT(i); } } if (code) ERROR_EXIT(code); TapeLog(1, taskId, 0, 0, "Tape accepted - now dumping database\n"); /* we have a writable tape */ code = writeDbDump(&tapeInfo, taskId, expires, dumpEntry.id); if (code) ERROR_EXIT(code); /* Now delete the entries between time 0 and archive-time */ if (saveDbIfPtr->archiveTime) code = bcdb_deleteDump(0, 0, saveDbIfPtr->archiveTime, 0); error_exit: unmountTape(taskId, &tapeInfo); /* Add this dump's tapes to the database and mark it finished */ if (dumpEntry.id) { i = addTapesToDb(taskId); if (!code) code = i; i = bcdb_FinishDump(&dumpEntry); if (!code) code = i; } freeTapeList(); if (code == TC_ABORTEDBYREQUEST) { TLog(taskId, "SaveDb: Aborted by request\n"); clearStatus(taskId, ABORT_REQUEST); setStatus(taskId, ABORT_DONE); } else if (code) { TapeLog(0, taskId, code, 0, "SaveDb: Finished with errors\n"); setStatus(taskId, TASK_ERROR); } else { TLog(taskId, "SaveDb: Finished\n"); } setStatus(taskId, TASK_DONE); free(saveDbIfPtr); LeaveDeviceQueue(deviceLatch); return (void *)(intptr_t)(code); }
void * restoreDbFromTape(void *param) { afs_uint32 taskId = (intptr_t) param; afs_int32 code = 0; afs_int32 i; struct butm_tapeInfo tapeInfo; struct rstTapeInfo rstTapeInfo; struct budb_dumpEntry dumpEntry; extern struct tapeConfig globalTapeConfig; extern struct deviceSyncNode *deviceLatch; afs_pthread_setname_self("Db restore"); setStatus(taskId, DRIVE_WAIT); EnterDeviceQueue(deviceLatch); /* lock tape device */ clearStatus(taskId, DRIVE_WAIT); printf("\n\n"); TLog(taskId, "RestoreDb\n"); tapeInfo.structVersion = BUTM_MAJORVERSION; code = butm_file_Instantiate(&tapeInfo, &globalTapeConfig); if (code) { ErrorLog(0, taskId, code, tapeInfo.error, "Can't initialize tape module\n"); ERROR_EXIT(code); } listEntryHead = NULL; rstTapeInfo.taskId = taskId; rstTapeInfo.tapeSeq = 1; rstTapeInfo.dumpid = 0; code = readDbTape(&tapeInfo, &rstTapeInfo, autoQuery); if (code) ERROR_EXIT(code); code = restoreDbEntries(&tapeInfo, &rstTapeInfo); if (code) ERROR_EXIT(code); error_exit: /* Now put this dump into the database */ /* Make a dump entry from first tape */ listEntryPtr = listEntryHead; if (listEntryPtr) { makeDbDumpEntry(tapeEntryPtr, &dumpEntry); if (dumpEntry.id != 0) { i = bcdb_CreateDump(&dumpEntry); if (i) { if (i == BUDB_DUMPIDEXISTS) fprintf(stderr, "Dump id %d not added to database - already exists\n", dumpEntry.id); else TapeLog(0, taskId, i, 0, "Dump id %d not added to database\n", dumpEntry.id); } else { i = addTapesToDb(taskId); if (!code) code = i; i = bcdb_FinishDump(&dumpEntry); if (!code) code = i; } } freeTapeList(); } unmountTape(taskId, &tapeInfo); waitDbWatcher(); if (code == TC_ABORTEDBYREQUEST) { TLog(taskId, "RestoreDb: Aborted by request\n"); clearStatus(taskId, ABORT_REQUEST); setStatus(taskId, ABORT_DONE); } else if (code) { TapeLog(0, taskId, code, 0, "RestoreDb: Finished with errors\n"); setStatus(taskId, TASK_ERROR); } else { TLog(taskId, "RestoreDb: Finished\n"); } LeaveDeviceQueue(deviceLatch); setStatus(taskId, TASK_DONE); return (void *)(intptr_t)(code); }