TEST_P(SnapshotRestoreTest, TestFailOver) { auto foc_ctx(start_one_foc()); auto ns_ptr = make_random_namespace(); SharedVolumePtr v = newVolume(VolumeId("volume1"), ns_ptr->ns(), VolumeSize((1 << 18) * 512), SCOMultiplier(1)); v->setFailOverCacheConfig(foc_ctx->config(GetParam().foc_mode())); VolumeConfig cfg = v->get_config(); v->createSnapshot(SnapshotName("snap0")); for(int i = 0; i < 5; ++i) { writeToVolume(*v, 0, 4096, "a"); } waitForThisBackendWrite(*v); v->restoreSnapshot(SnapshotName("snap0")); for(int i = 0; i < 7; ++i) { writeToVolume(*v, 8, 4096, "d"); } flushFailOverCache(*v); destroyVolume(v, DeleteLocalData::T, RemoveVolumeCompletely::F); SharedVolumePtr v1 = 0; v1 = getVolume(VolumeId("volume1")); ASSERT_FALSE(v1); restartVolume(cfg); v1 = getVolume(VolumeId("volume1")); ASSERT_TRUE(v1 != nullptr); checkVolume(*v1,0,4096, "\0"); checkVolume(*v1,8,4096, "d"); checkCurrentBackendSize(*v1); }
TEST_P(SnapshotRestoreTest, RestoreAndWriteAgain2) { auto ns_ptr = make_random_namespace(); SharedVolumePtr v = newVolume(VolumeId("volume1"), ns_ptr->ns(), VolumeSize(1 << 26), SCOMultiplier(1)); const std::string pattern("e-manual"); v->createSnapshot(SnapshotName("snap1")); waitForThisBackendWrite(*v); writeToVolume(*v, 0, 5 * 4096, pattern); v->createSnapshot(SnapshotName("snap2")); waitForThisBackendWrite(*v); restoreSnapshot(*v,"snap1"); writeToVolume(*v, 0, 10*4096, pattern); waitForThisBackendWrite(*v); checkCurrentBackendSize(*v); }
TEST_P(SnapshotRestoreTest, HaltOnError) { auto ns_ptr = make_random_namespace(); SharedVolumePtr v = newVolume(VolumeId("volume1"), ns_ptr->ns()); const std::string pattern1("blah"); const TLogId tlog_id(v->getSnapshotManagement().getCurrentTLogId()); writeToVolume(*v, 0, 4096, pattern1); v->createSnapshot(SnapshotName("snap1")); waitForThisBackendWrite(*v); EXPECT_THROW(restoreSnapshot(*v, "snap42"), std::exception); EXPECT_FALSE(v->is_halted()); v->getBackendInterface()->remove(boost::lexical_cast<std::string>(tlog_id)); EXPECT_THROW(restoreSnapshot(*v, "snap1"), std::exception); EXPECT_TRUE(v->is_halted()); }
void ReplicationCoordinatorExternalStateImpl::updateCommittedSnapshot(OpTime newCommitPoint) { auto manager = getGlobalServiceContext()->getGlobalStorageEngine()->getSnapshotManager(); invariant(manager); // This should never be called if there is no SnapshotManager. manager->setCommittedSnapshot(SnapshotName(newCommitPoint.getTimestamp())); }
void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try { if (_consistencyMarkers->getInitialSyncFlag(opCtx)) { log() << "No recovery needed. Initial sync flag set."; return; // Initial Sync will take over so no cleanup is needed. } const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx); const auto appliedThrough = _consistencyMarkers->getAppliedThrough(opCtx); if (!truncateAfterPoint.isNull()) { log() << "Removing unapplied entries starting at: " << truncateAfterPoint.toBSON(); _truncateOplogTo(opCtx, truncateAfterPoint); } // Clear the truncateAfterPoint so that we don't truncate the next batch of oplog entries // erroneously. _consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {}); // TODO (SERVER-30556): Delete this line since the old oplog delete from point cannot exist. _consistencyMarkers->removeOldOplogDeleteFromPointField(opCtx); auto topOfOplogSW = _getLastAppliedOpTime(opCtx); boost::optional<OpTime> topOfOplog = boost::none; if (topOfOplogSW.getStatus() != ErrorCodes::CollectionIsEmpty && topOfOplogSW.getStatus() != ErrorCodes::NamespaceNotFound) { fassertStatusOK(40290, topOfOplogSW); topOfOplog = topOfOplogSW.getValue(); } // If we have a checkpoint timestamp, then we recovered to a timestamp and should set the // initial data timestamp to that. Otherwise, we simply recovered the data on disk so we should // set the initial data timestamp to the top OpTime in the oplog once the data is consistent // there. If there is nothing in the oplog, then we do not set the initial data timestamp. auto checkpointTimestamp = _consistencyMarkers->getCheckpointTimestamp(opCtx); if (!checkpointTimestamp.isNull()) { // If we have a checkpoint timestamp, we set the initial data timestamp now so that // the operations we apply below can be given the proper timestamps. _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), SnapshotName(checkpointTimestamp)); } // Oplog is empty. There are no oplog entries to apply, so we exit recovery. If there was a // checkpointTimestamp then we already set the initial data timestamp. Otherwise, there is // nothing to set it to. if (!topOfOplog) { log() << "No oplog entries to apply for recovery. Oplog is empty."; return; } if (auto startPoint = _getOplogApplicationStartPoint(checkpointTimestamp, appliedThrough)) { _applyToEndOfOplog(opCtx, startPoint.get(), topOfOplog->getTimestamp()); } // If we don't have a checkpoint timestamp, then we are either not running a storage engine // that supports "recover to stable timestamp" or we just upgraded from a version that didn't. // In both cases, the data on disk is not consistent until we have applied all oplog entries to // the end of the oplog, since we do not know which ones actually got applied before shutdown. // As a result, we do not set the initial data timestamp until after we have applied to the end // of the oplog. if (checkpointTimestamp.isNull()) { _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), SnapshotName(topOfOplog->getTimestamp())); } } catch (...) { severe() << "Caught exception during replication recovery: " << exceptionToStatus(); std::terminate(); }
bool Backup::preexisting_volume_checks() { LOG_INFO(__FUNCTION__); VERIFY(source_snapshot_persistor); VERIFY(target_volume_); api::getManagementMutex().assertLocked(); if(start_snapshot_) { const SnapshotName& start_snapshot_name = *start_snapshot_; const SnapshotNum start_snapshot_num = source_snapshot_persistor->getSnapshotNum(*start_snapshot_); const UUID start_snapshot_uuid = source_snapshot_persistor->getUUID(start_snapshot_num); LOG_INFO("Backup target volume existed, figuring out if we need to do work"); // We are going to apply an incremental with snapshot matching //const SnapshotNum start_snapshot_num = source_snapshot_persistor.getSnapshotNum(*start_snapshot); if(not api::checkSnapshotUUID(target_volume_.get(), start_snapshot_name, start_snapshot_uuid)) { LOG_FATAL("Snapshot with name " << start_snapshot_name << " has non matching guids, exiting"); throw BackupException("Non matching guids"); } else if(api::snapshotExists(target_volume_.get(), end_snapshot_name)) { const UUID end_snapshot_uuid = source_snapshot_persistor->getUUID(end_snapshot_number); if(api::checkSnapshotUUID(target_volume_.get(), end_snapshot_name, end_snapshot_uuid)) { LOG_INFO("Start and end are already on target and guids match, lovely!"); LOG_INFO("Exiting early"); { status_.finish(); } return false; } else { LOG_INFO("Start snapshot was on target but end snapshot had different guid"); boost::this_thread::interruption_point(); api::restoreSnapshot(target_volume_.get(), start_snapshot_name); } } else { LOG_INFO("Start snapshot was on target but end snapshot not"); boost::this_thread::interruption_point(); api::restoreSnapshot(target_volume_.get(), start_snapshot_name); } } else { LOG_INFO("Backup volume existed but no start snapshot was given... trying to find the best place to backup from"); LOG_INFO("Getting the list of snapshots from the target"); std::list<SnapshotName> snapshots_list; api::showSnapshots(target_volume_.get(), snapshots_list); if(snapshots_list.empty()) { LOG_WARN("No snapshots in backed up volume, something went wrong??"); LOG_WARN("Checking for a failed first backup because the snapshots list is empty"); get_target_volume_info(); if(source_volume_config->getNS().str() != target_volume_config->id_) { LOG_FATAL("No snapshot on the volume and volume names don't match: " << source_volume_config->getNS() << " vs. " << std::string(target_volume_config->id_)); throw BackupException("Volume on backup had no snapshots and wrong volume name"); } if(target_volume_config->wan_backup_volume_role_ != VolumeConfig::WanBackupVolumeRole::WanBackupBase) { LOG_FATAL("Target volume exists, has not snapshot and names match but has the wrong role"); throw BackupException("Volume on backup had no snapshots and wrong role"); } LOG_WARN("Seems to have been a botched backup, cleaning up"); BackendInterfacePtr bip = VolManager::get()->createBackendInterface(target_namespace); std::list<std::string> objects; bip->listObjects(objects); BackendNamesFilter is_vd_object; for (const auto& o : objects) { if (is_vd_object(o)) { LOG_INFO("Removing " << o); bip->remove(o); } } throw RetryCreateVolume(); } SnapshotName snapshot_to_be_restored; LOG_INFO("Looping of target snapshotshots to find latest that can be matched"); for(std::list<SnapshotName>::const_reverse_iterator i = snapshots_list.rbegin(); i != snapshots_list.rend(); ++i) { const SnapshotName& snap_name = *i; if(source_snapshot_persistor->snapshotExists(snap_name)) { SnapshotNum num = source_snapshot_persistor->getSnapshotNum(snap_name); UUID snap_uuid = source_snapshot_persistor->getUUID(num); if(api::checkSnapshotUUID(target_volume_.get(), snap_name, snap_uuid)) { snapshot_to_be_restored = snap_name; break; } else { LOG_FATAL("Snapshot with the same name but different guid found, " << snap_name); throw BackupException("Guid confusion"); } } } if(snapshot_to_be_restored.empty()) { LOG_FATAL("Could not find a matching snapshot "); throw BackupException("Could not find a matching snapshot"); } else if(snapshot_to_be_restored == end_snapshot_name) { LOG_INFO("snapshot to be restored == end snapshot, exiting early"); { status_.finish(); } return false; } // Y42 we might be a lot smarter here LOG_INFO("Connecting snapshot found as " << snapshot_to_be_restored); LOG_INFO("Doing a restore to that snapshot on the target volume"); boost::this_thread::interruption_point(); api::restoreSnapshot(target_volume_.get(), snapshot_to_be_restored); start_snapshot_ = snapshot_to_be_restored; } status_.start_snapshot(start_snapshot_.get_value_or(SnapshotName())); return true; }
TEST_P(SnapshotRestoreTest, SimpleRestore) { auto ns_ptr = make_random_namespace(); SharedVolumePtr v = newVolume(VolumeId("volume1"), ns_ptr->ns()); const std::string pattern1("Frederik"); writeToVolume(*v, 0, 4096, pattern1); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snap1")); const std::string pattern2("Frederik"); writeToVolume(*v, 0, 4096, pattern2); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snap2")); const std::string pattern3("Arne"); writeToVolume(*v, 0, 4096, pattern3); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snap3")); const std::string pattern4("Bart"); writeToVolume(*v, 0, 4096, pattern4); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snap4")); const std::string pattern5("Wouter"); writeToVolume(*v, 0, 4096, pattern5); checkVolume(*v,0,4096,pattern5); waitForThisBackendWrite(*v); EXPECT_NO_THROW(restoreSnapshot(*v, "snap4")); checkVolume(*v,0,4096,pattern4); writeToVolume(*v, 0, 4096, "Bollocks"); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snapper")); waitForThisBackendWrite(*v); EXPECT_NO_THROW(restoreSnapshot(*v, "snap3")); checkVolume(*v,0,4096,pattern3); writeToVolume(*v, 0, 4096, "Bollocks"); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snapper")); waitForThisBackendWrite(*v); EXPECT_NO_THROW(restoreSnapshot(*v, "snap2")); checkVolume(*v,0,4096,pattern2); writeToVolume(*v, 0, 4096, "Bollocks"); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snapper")); waitForThisBackendWrite(*v); EXPECT_NO_THROW(restoreSnapshot(*v, "snap1")); checkVolume(*v,0,4096,pattern1); writeToVolume(*v, 0, 4096, "Bollocks"); waitForThisBackendWrite(*v); v->createSnapshot(SnapshotName("snapper")); waitForThisBackendWrite(*v); checkCurrentBackendSize(*v); }