TEST_P(SnapshotRestoreTest, TestFailOver)
{
    auto foc_ctx(start_one_foc());
    auto ns_ptr = make_random_namespace();
    SharedVolumePtr v = newVolume(VolumeId("volume1"),
                          ns_ptr->ns(),
                          VolumeSize((1 << 18) * 512),
                          SCOMultiplier(1));

    v->setFailOverCacheConfig(foc_ctx->config(GetParam().foc_mode()));

    VolumeConfig cfg = v->get_config();
    v->createSnapshot(SnapshotName("snap0"));

    for(int i = 0; i < 5; ++i)
    {
        writeToVolume(*v,
                      0,
                      4096,
                      "a");
    }


    waitForThisBackendWrite(*v);
    v->restoreSnapshot(SnapshotName("snap0"));

    for(int i = 0; i < 7; ++i)
    {
        writeToVolume(*v,
                      8,
                      4096,
                      "d");
    }

    flushFailOverCache(*v);
    destroyVolume(v,
                  DeleteLocalData::T,
                  RemoveVolumeCompletely::F);

    SharedVolumePtr v1 = 0;
    v1 = getVolume(VolumeId("volume1"));
    ASSERT_FALSE(v1);
    restartVolume(cfg);
    v1 = getVolume(VolumeId("volume1"));

    ASSERT_TRUE(v1 != nullptr);
    checkVolume(*v1,0,4096, "\0");
    checkVolume(*v1,8,4096, "d");
    checkCurrentBackendSize(*v1);
}
TEST_P(SnapshotRestoreTest, RestoreAndWriteAgain2)
{
    auto ns_ptr = make_random_namespace();
    SharedVolumePtr v = newVolume(VolumeId("volume1"),
                          ns_ptr->ns(),
                          VolumeSize(1 << 26),
                          SCOMultiplier(1));

    const std::string pattern("e-manual");

    v->createSnapshot(SnapshotName("snap1"));
    waitForThisBackendWrite(*v);

    writeToVolume(*v, 0, 5 * 4096, pattern);
    v->createSnapshot(SnapshotName("snap2"));
    waitForThisBackendWrite(*v);

    restoreSnapshot(*v,"snap1");

    writeToVolume(*v, 0, 10*4096, pattern);
    waitForThisBackendWrite(*v);
    checkCurrentBackendSize(*v);
}
TEST_P(SnapshotRestoreTest, HaltOnError)
{
    auto ns_ptr = make_random_namespace();
    SharedVolumePtr v = newVolume(VolumeId("volume1"),
                          ns_ptr->ns());

    const std::string pattern1("blah");

    const TLogId tlog_id(v->getSnapshotManagement().getCurrentTLogId());

    writeToVolume(*v, 0, 4096, pattern1);
    v->createSnapshot(SnapshotName("snap1"));
    waitForThisBackendWrite(*v);

    EXPECT_THROW(restoreSnapshot(*v, "snap42"),
                 std::exception);
    EXPECT_FALSE(v->is_halted());

    v->getBackendInterface()->remove(boost::lexical_cast<std::string>(tlog_id));
    EXPECT_THROW(restoreSnapshot(*v, "snap1"),
                 std::exception);
    EXPECT_TRUE(v->is_halted());
}
void ReplicationCoordinatorExternalStateImpl::updateCommittedSnapshot(OpTime newCommitPoint) {
    auto manager = getGlobalServiceContext()->getGlobalStorageEngine()->getSnapshotManager();
    invariant(manager);  // This should never be called if there is no SnapshotManager.
    manager->setCommittedSnapshot(SnapshotName(newCommitPoint.getTimestamp()));
}
Example #5
0
void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try {
    if (_consistencyMarkers->getInitialSyncFlag(opCtx)) {
        log() << "No recovery needed. Initial sync flag set.";
        return;  // Initial Sync will take over so no cleanup is needed.
    }

    const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx);
    const auto appliedThrough = _consistencyMarkers->getAppliedThrough(opCtx);

    if (!truncateAfterPoint.isNull()) {
        log() << "Removing unapplied entries starting at: " << truncateAfterPoint.toBSON();
        _truncateOplogTo(opCtx, truncateAfterPoint);
    }

    // Clear the truncateAfterPoint so that we don't truncate the next batch of oplog entries
    // erroneously.
    _consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {});

    // TODO (SERVER-30556): Delete this line since the old oplog delete from point cannot exist.
    _consistencyMarkers->removeOldOplogDeleteFromPointField(opCtx);

    auto topOfOplogSW = _getLastAppliedOpTime(opCtx);
    boost::optional<OpTime> topOfOplog = boost::none;
    if (topOfOplogSW.getStatus() != ErrorCodes::CollectionIsEmpty &&
        topOfOplogSW.getStatus() != ErrorCodes::NamespaceNotFound) {
        fassertStatusOK(40290, topOfOplogSW);
        topOfOplog = topOfOplogSW.getValue();
    }

    // If we have a checkpoint timestamp, then we recovered to a timestamp and should set the
    // initial data timestamp to that. Otherwise, we simply recovered the data on disk so we should
    // set the initial data timestamp to the top OpTime in the oplog once the data is consistent
    // there. If there is nothing in the oplog, then we do not set the initial data timestamp.
    auto checkpointTimestamp = _consistencyMarkers->getCheckpointTimestamp(opCtx);
    if (!checkpointTimestamp.isNull()) {

        // If we have a checkpoint timestamp, we set the initial data timestamp now so that
        // the operations we apply below can be given the proper timestamps.
        _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(),
                                                   SnapshotName(checkpointTimestamp));
    }

    // Oplog is empty. There are no oplog entries to apply, so we exit recovery. If there was a
    // checkpointTimestamp then we already set the initial data timestamp. Otherwise, there is
    // nothing to set it to.
    if (!topOfOplog) {
        log() << "No oplog entries to apply for recovery. Oplog is empty.";
        return;
    }

    if (auto startPoint = _getOplogApplicationStartPoint(checkpointTimestamp, appliedThrough)) {
        _applyToEndOfOplog(opCtx, startPoint.get(), topOfOplog->getTimestamp());
    }

    // If we don't have a checkpoint timestamp, then we are either not running a storage engine
    // that supports "recover to stable timestamp" or we just upgraded from a version that didn't.
    // In both cases, the data on disk is not consistent until we have applied all oplog entries to
    // the end of the oplog, since we do not know which ones actually got applied before shutdown.
    // As a result, we do not set the initial data timestamp until after we have applied to the end
    // of the oplog.
    if (checkpointTimestamp.isNull()) {
        _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(),
                                                   SnapshotName(topOfOplog->getTimestamp()));
    }

} catch (...) {
    severe() << "Caught exception during replication recovery: " << exceptionToStatus();
    std::terminate();
}
Example #6
0
bool
Backup::preexisting_volume_checks()
{
    LOG_INFO(__FUNCTION__);
    VERIFY(source_snapshot_persistor);
    VERIFY(target_volume_);

    api::getManagementMutex().assertLocked();

    if(start_snapshot_)
    {
        const SnapshotName& start_snapshot_name = *start_snapshot_;
        const SnapshotNum start_snapshot_num =
            source_snapshot_persistor->getSnapshotNum(*start_snapshot_);
        const UUID start_snapshot_uuid = source_snapshot_persistor->getUUID(start_snapshot_num);

        LOG_INFO("Backup target volume existed, figuring out if we need to do work");


        // We are going to apply an incremental with snapshot matching
        //const SnapshotNum start_snapshot_num = source_snapshot_persistor.getSnapshotNum(*start_snapshot);
        if(not api::checkSnapshotUUID(target_volume_.get(),
                                      start_snapshot_name,
                                      start_snapshot_uuid))
        {
            LOG_FATAL("Snapshot with name " << start_snapshot_name << " has non matching guids, exiting");
            throw BackupException("Non matching guids");
        }
        else if(api::snapshotExists(target_volume_.get(),
                                    end_snapshot_name))
        {
            const UUID end_snapshot_uuid = source_snapshot_persistor->getUUID(end_snapshot_number);
            if(api::checkSnapshotUUID(target_volume_.get(),
                                      end_snapshot_name,
                                      end_snapshot_uuid))
            {
                LOG_INFO("Start and end are already on target and guids match, lovely!");
                LOG_INFO("Exiting early");
                {
                    status_.finish();
                }

                return false;
            }
            else
            {
                LOG_INFO("Start snapshot was on target but end snapshot had different guid");
                boost::this_thread::interruption_point();

                api::restoreSnapshot(target_volume_.get(),
                                     start_snapshot_name);
            }
        }
        else
        {
            LOG_INFO("Start snapshot was on target but end snapshot not");
            boost::this_thread::interruption_point();

            api::restoreSnapshot(target_volume_.get(),
                                 start_snapshot_name);
        }
    }
    else
    {
        LOG_INFO("Backup volume existed but no start snapshot was given... trying to find the best place to backup from");
        LOG_INFO("Getting the list of snapshots from the target");

        std::list<SnapshotName> snapshots_list;
        api::showSnapshots(target_volume_.get(),
                           snapshots_list);

        if(snapshots_list.empty())
        {
            LOG_WARN("No snapshots in backed up volume, something went wrong??");
            LOG_WARN("Checking for a failed first backup because the snapshots list is empty");
            get_target_volume_info();
            if(source_volume_config->getNS().str() != target_volume_config->id_)
            {
                LOG_FATAL("No snapshot on the volume and volume names don't match: "
                          << source_volume_config->getNS() << " vs. " << std::string(target_volume_config->id_));

                throw BackupException("Volume on backup had no snapshots and wrong volume name");
            }
            if(target_volume_config->wan_backup_volume_role_ != VolumeConfig::WanBackupVolumeRole::WanBackupBase)
            {
                LOG_FATAL("Target volume exists, has not snapshot and names match but has the wrong role");
                throw BackupException("Volume on backup had no snapshots and wrong role");
            }
            LOG_WARN("Seems to have been a botched backup, cleaning up");
            BackendInterfacePtr bip = VolManager::get()->createBackendInterface(target_namespace);
            std::list<std::string> objects;
            bip->listObjects(objects);

            BackendNamesFilter is_vd_object;
            for (const auto& o : objects)
            {
                if (is_vd_object(o))
                {
                    LOG_INFO("Removing " << o);
                    bip->remove(o);
                }
            }

            throw RetryCreateVolume();
        }

        SnapshotName snapshot_to_be_restored;

        LOG_INFO("Looping of target snapshotshots to find latest that can be matched");

        for(std::list<SnapshotName>::const_reverse_iterator i = snapshots_list.rbegin();
            i != snapshots_list.rend();
            ++i)
        {
            const SnapshotName& snap_name = *i;
            if(source_snapshot_persistor->snapshotExists(snap_name))
            {
                SnapshotNum num = source_snapshot_persistor->getSnapshotNum(snap_name);
                UUID snap_uuid = source_snapshot_persistor->getUUID(num);
                if(api::checkSnapshotUUID(target_volume_.get(),
                                          snap_name,
                                          snap_uuid))
                {
                    snapshot_to_be_restored = snap_name;
                    break;
                }
                else
                {
                    LOG_FATAL("Snapshot with the same name but different guid found, " <<
                              snap_name);
                    throw BackupException("Guid confusion");
                }

            }
        }
        if(snapshot_to_be_restored.empty())
        {
            LOG_FATAL("Could not find a matching snapshot ");
            throw BackupException("Could not find a matching snapshot");
        }
        else if(snapshot_to_be_restored == end_snapshot_name)
        {
            LOG_INFO("snapshot to be restored == end snapshot, exiting early");
            {
                status_.finish();
            }

            return false;
        }

        // Y42 we might be a lot smarter here
        LOG_INFO("Connecting snapshot found as " << snapshot_to_be_restored);

        LOG_INFO("Doing a restore to that snapshot on the target volume");
        boost::this_thread::interruption_point();
        api::restoreSnapshot(target_volume_.get(),
                             snapshot_to_be_restored);
        start_snapshot_ = snapshot_to_be_restored;

    }
    status_.start_snapshot(start_snapshot_.get_value_or(SnapshotName()));

    return true;
}
TEST_P(SnapshotRestoreTest, SimpleRestore)
{
    auto ns_ptr = make_random_namespace();
    SharedVolumePtr v = newVolume(VolumeId("volume1"),
                          ns_ptr->ns());

    const std::string pattern1("Frederik");

    writeToVolume(*v, 0, 4096, pattern1);
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snap1"));

    const std::string pattern2("Frederik");

    writeToVolume(*v, 0, 4096, pattern2);
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snap2"));


    const std::string pattern3("Arne");

    writeToVolume(*v, 0, 4096, pattern3);
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snap3"));

    const std::string pattern4("Bart");

    writeToVolume(*v, 0, 4096, pattern4);
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snap4"));

    const std::string pattern5("Wouter");
    writeToVolume(*v, 0, 4096, pattern5);

    checkVolume(*v,0,4096,pattern5);
    waitForThisBackendWrite(*v);

    EXPECT_NO_THROW(restoreSnapshot(*v,
                                    "snap4"));

    checkVolume(*v,0,4096,pattern4);
    writeToVolume(*v, 0, 4096, "Bollocks");
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snapper"));
    waitForThisBackendWrite(*v);

    EXPECT_NO_THROW(restoreSnapshot(*v,
                                    "snap3"));

    checkVolume(*v,0,4096,pattern3);
    writeToVolume(*v, 0, 4096, "Bollocks");
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snapper"));
    waitForThisBackendWrite(*v);

    EXPECT_NO_THROW(restoreSnapshot(*v,
                                    "snap2"));

    checkVolume(*v,0,4096,pattern2);
    writeToVolume(*v, 0, 4096, "Bollocks");
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snapper"));
    waitForThisBackendWrite(*v);

    EXPECT_NO_THROW(restoreSnapshot(*v,
                                    "snap1"));

    checkVolume(*v,0,4096,pattern1);
    writeToVolume(*v, 0, 4096, "Bollocks");
    waitForThisBackendWrite(*v);
    v->createSnapshot(SnapshotName("snapper"));
    waitForThisBackendWrite(*v);
    checkCurrentBackendSize(*v);
}