Status preUpgradeCheck(const ConnectionString& configServer,
                           const VersionType& lastVersionInfo,
                           string minMongosVersion) {
        if (lastVersionInfo.isUpgradeIdSet() && lastVersionInfo.getUpgradeId().isSet()) {
            //
            // Another upgrade failed, so cleanup may be necessary
            //

            BSONObj lastUpgradeState = lastVersionInfo.getUpgradeState();

            bool inCriticalSection;
            string errMsg;
            if (!FieldParser::extract(lastUpgradeState,
                                      inCriticalSectionField,
                                      &inCriticalSection,
                                      &errMsg)) {
                return Status(ErrorCodes::FailedToParse, causedBy(errMsg));
            }

            if (inCriticalSection) {
                // Note: custom message must be supplied by caller
                return Status(ErrorCodes::ManualInterventionRequired, "");
            }
        }

        //
        // Check the versions of other mongo processes in the cluster before upgrade.
        // We can't upgrade if there are active pre-v2.4 processes in the cluster
        //
        return checkClusterMongoVersions(configServer, string(minMongosVersion));
    }
    /**
     * Upgrades v4 to v5.
     */
    bool doUpgradeV4ToV5(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg)
    {
        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_MandatoryEpochVersion);
        Status result = preUpgradeCheck(configLoc, lastVersionInfo, minMongoProcessVersion);

        if (!result.isOK()) {
            if (result.code() == ErrorCodes::ManualInterventionRequired) {
                *errMsg = cannotCleanupMessage;
            }
            else {
                *errMsg = result.toString();
            }

            return false;
        }

        // This is not needed because we are not actually going to make any modifications
        // on the other collections in the config server for this particular upgrade.
        // startConfigUpgrade(configLoc.toString(),
        //                    lastVersionInfo.getCurrentVersion(),
        //                    OID::gen());

        // If we actually need to modify something in the config servers these need to follow
        // after calling startConfigUpgrade(...):
        //
        // 1. Acquire necessary locks.
        // 2. Make a backup of the collections we are about to modify.
        // 3. Perform the upgrade process on the backup collection.
        // 4. Verify that no changes were made to the collections since the backup was performed.
        // 5. Call enterConfigUpgradeCriticalSection(configLoc.toString(),
        //    lastVersionInfo.getCurrentVersion()).
        // 6. Rename the backup collection to the name of the original collection with
        //    dropTarget set to true.

        // We're only after the version bump in commitConfigUpgrade here since we never
        // get into the critical section.
        Status commitStatus = commitConfigUpgrade(configLoc.toString(),
                                                  lastVersionInfo.getCurrentVersion(),
                                                  MIN_COMPATIBLE_CONFIG_VERSION,
                                                  CURRENT_CONFIG_VERSION);

        if (!commitStatus.isOK()) {
            *errMsg = commitStatus.toString();
            return false;
        }

        return true;
    }
    TEST_F(ConfigUpgradeTests, EmptyVersion) {

        //
        // Tests detection of empty config version
        //

        // Zero version (no version doc)
        VersionType oldVersion;
        Status status = getConfigVersion(grid.catalogManager(), &oldVersion);
        ASSERT(status.isOK());

        ASSERT_EQUALS(oldVersion.getMinCompatibleVersion(), 0);
        ASSERT_EQUALS(oldVersion.getCurrentVersion(), 0);
    }
Status CatalogManagerReplicaSet::checkAndUpgrade(bool checkOnly) {
    auto versionStatus = _getConfigVersion();
    if (!versionStatus.isOK()) {
        return versionStatus.getStatus();
    }

    auto versionInfo = versionStatus.getValue();
    if (versionInfo.getMinCompatibleVersion() > CURRENT_CONFIG_VERSION) {
        return {ErrorCodes::IncompatibleShardingConfigVersion,
                str::stream() << "current version v" << CURRENT_CONFIG_VERSION
                              << " is older than the cluster min compatible v"
                              << versionInfo.getMinCompatibleVersion()};
    }

    if (versionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion) {
        VersionType newVersion;
        newVersion.setClusterId(OID::gen());

        // For v3.2, only v3.2 binaries can talk to RS Config servers.
        newVersion.setMinCompatibleVersion(CURRENT_CONFIG_VERSION);
        newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION);

        BSONObj versionObj(newVersion.toBSON());

        return update(VersionType::ConfigNS,
                      versionObj,
                      versionObj,
                      true /* upsert*/,
                      false /* multi */,
                      nullptr);
    }

    if (versionInfo.getCurrentVersion() == UpgradeHistory_UnreportedVersion) {
        return {ErrorCodes::IncompatibleShardingConfigVersion,
                "Assuming config data is old since the version document cannot be found in the"
                "config server and it contains databases aside 'local' and 'admin'. "
                "Please upgrade if this is the case. Otherwise, make sure that the config "
                "server is clean."};
    }

    if (versionInfo.getCurrentVersion() < CURRENT_CONFIG_VERSION) {
        return {ErrorCodes::IncompatibleShardingConfigVersion,
                str::stream() << "need to upgrade current cluster version to v"
                              << CURRENT_CONFIG_VERSION << "; currently at v"
                              << versionInfo.getCurrentVersion()};
    }

    return Status::OK();
}
Status ShardingCatalogManager::_initConfigVersion(OperationContext* opCtx) {
    const auto catalogClient = Grid::get(opCtx)->catalogClient();

    auto versionStatus =
        catalogClient->getConfigVersion(opCtx, repl::ReadConcernLevel::kLocalReadConcern);
    if (!versionStatus.isOK()) {
        return versionStatus.getStatus();
    }

    const auto& versionInfo = versionStatus.getValue();
    if (versionInfo.getMinCompatibleVersion() > CURRENT_CONFIG_VERSION) {
        return {ErrorCodes::IncompatibleShardingConfigVersion,
                str::stream() << "current version v" << CURRENT_CONFIG_VERSION
                              << " is older than the cluster min compatible v"
                              << versionInfo.getMinCompatibleVersion()};
    }

    if (versionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion) {
        VersionType newVersion;
        newVersion.setClusterId(OID::gen());
        newVersion.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
        newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION);

        BSONObj versionObj(newVersion.toBSON());
        auto insertStatus = catalogClient->insertConfigDocument(
            opCtx, VersionType::ConfigNS, versionObj, kNoWaitWriteConcern);

        return insertStatus;
    }

    if (versionInfo.getCurrentVersion() == UpgradeHistory_UnreportedVersion) {
        return {ErrorCodes::IncompatibleShardingConfigVersion,
                "Assuming config data is old since the version document cannot be found in the "
                "config server and it contains databases besides 'local' and 'admin'. "
                "Please upgrade if this is the case. Otherwise, make sure that the config "
                "server is clean."};
    }

    if (versionInfo.getCurrentVersion() < CURRENT_CONFIG_VERSION) {
        return {ErrorCodes::IncompatibleShardingConfigVersion,
                str::stream() << "need to upgrade current cluster version to v"
                              << CURRENT_CONFIG_VERSION
                              << "; currently at v"
                              << versionInfo.getCurrentVersion()};
    }

    return Status::OK();
}
        /**
         * Stores a newer { version, minVersion, currentVersion, clusterId } config server entry.
         *
         * @return clusterId
         */
        OID storeConfigVersion(int configVersion) {

            if (configVersion < CURRENT_CONFIG_VERSION) {
                storeLegacyConfigVersion(configVersion);
                return OID();
            }

            VersionType version;
            version.setMinCompatibleVersion(configVersion);
            version.setCurrentVersion(configVersion);

            OID clusterId = OID::gen();

            version.setClusterId(clusterId);

            storeConfigVersion(version);
            return clusterId;
        }
StatusWith<VersionType> ShardingCatalogClientImpl::getConfigVersion(
    OperationContext* opCtx, repl::ReadConcernLevel readConcern) {
    auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
        opCtx,
        kConfigReadSelector,
        readConcern,
        VersionType::ConfigNS,
        BSONObj(),
        BSONObj(),
        boost::none /* no limit */);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    auto queryResults = findStatus.getValue().docs;

    if (queryResults.size() > 1) {
        return {ErrorCodes::TooManyMatchingDocuments,
                str::stream() << "should only have 1 document in " << VersionType::ConfigNS.ns()};
    }

    if (queryResults.empty()) {
        VersionType versionInfo;
        versionInfo.setMinCompatibleVersion(UpgradeHistory_EmptyVersion);
        versionInfo.setCurrentVersion(UpgradeHistory_EmptyVersion);
        versionInfo.setClusterId(OID{});
        return versionInfo;
    }

    BSONObj versionDoc = queryResults.front();
    auto versionTypeResult = VersionType::fromBSON(versionDoc);
    if (!versionTypeResult.isOK()) {
        return versionTypeResult.getStatus().withContext(
            str::stream() << "Unable to parse config.version document " << versionDoc);
    }

    auto validationStatus = versionTypeResult.getValue().validate();
    if (!validationStatus.isOK()) {
        return Status(validationStatus.withContext(
            str::stream() << "Unable to validate config.version document " << versionDoc));
    }

    return versionTypeResult.getValue();
}
    // Dispatches upgrades based on version to the upgrades registered in the upgrade registry
    bool _nextUpgrade(const ConnectionString& configLoc,
                      const ConfigUpgradeRegistry& registry,
                      const VersionType& lastVersionInfo,
                      VersionType* upgradedVersionInfo,
                      string* errMsg) {

        int fromVersion = lastVersionInfo.getCurrentVersion();

        ConfigUpgradeRegistry::const_iterator foundIt = registry.find(fromVersion);

        if (foundIt == registry.end()) {

            *errMsg = stream() << "newer version " << CURRENT_CONFIG_VERSION
                               << " of mongo config metadata is required, " << "current version is "
                               << fromVersion << ", "
                               << "don't know how to upgrade from this version";

            return false;
        }

        const Upgrade& upgrade = foundIt->second;
        int toVersion = upgrade.toVersionRange.currentVersion;

        log() << "starting next upgrade step from v" << fromVersion << " to v" << toVersion << endl;

        // Log begin to config.changelog
        grid.catalogManager()->logChange(NULL,
                                         "starting upgrade of config database",
                                         VersionType::ConfigNS,
                                         BSON("from" << fromVersion << "to" << toVersion));

        if (!upgrade.upgradeCallback(configLoc, lastVersionInfo, errMsg)) {

            *errMsg = stream() << "error upgrading config database from v" << fromVersion << " to v"
                               << toVersion << causedBy(errMsg);

            return false;
        }

        // Get the config version we've upgraded to and make sure it's sane
        Status verifyConfigStatus = getConfigVersion(configLoc, upgradedVersionInfo);

        if (!verifyConfigStatus.isOK()) {
            *errMsg = stream() << "failed to validate v" << fromVersion << " config version upgrade"
                               << causedBy(verifyConfigStatus);

            return false;
        }

        grid.catalogManager()->logChange(NULL,
                                         "finished upgrade of config database",
                                         VersionType::ConfigNS,
                                         BSON("from" << fromVersion << "to" << toVersion));
        return true;
    }
StatusWith<VersionType> CatalogManagerReplicaSet::_getConfigVersion(OperationContext* txn) {
    const auto configShard = grid.shardRegistry()->getShard(txn, "config");
    const auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto readHost = readHostStatus.getValue();
    auto findStatus = _exhaustiveFindOnConfig(readHost,
                                              NamespaceString(VersionType::ConfigNS),
                                              BSONObj(),
                                              BSONObj(),
                                              boost::none /* no limit */);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    auto queryResults = findStatus.getValue().value;

    if (queryResults.size() > 1) {
        return {ErrorCodes::RemoteValidationError,
                str::stream() << "should only have 1 document in " << VersionType::ConfigNS};
    }

    if (queryResults.empty()) {
        auto countStatus =
            _runCountCommandOnConfig(readHost, NamespaceString(ShardType::ConfigNS), BSONObj());

        if (!countStatus.isOK()) {
            return countStatus.getStatus();
        }

        const auto& shardCount = countStatus.getValue();
        if (shardCount > 0) {
            // Version document doesn't exist, but config.shards is not empty. Assuming that
            // the current config metadata is pre v2.4.
            VersionType versionInfo;
            versionInfo.setMinCompatibleVersion(UpgradeHistory_UnreportedVersion);
            versionInfo.setCurrentVersion(UpgradeHistory_UnreportedVersion);
            return versionInfo;
        }

        VersionType versionInfo;
        versionInfo.setMinCompatibleVersion(UpgradeHistory_EmptyVersion);
        versionInfo.setCurrentVersion(UpgradeHistory_EmptyVersion);
        return versionInfo;
    }

    BSONObj versionDoc = queryResults.front();
    auto versionTypeResult = VersionType::fromBSON(versionDoc);
    if (!versionTypeResult.isOK()) {
        return Status(ErrorCodes::UnsupportedFormat,
                      str::stream() << "invalid config version document: " << versionDoc
                                    << versionTypeResult.getStatus().toString());
    }

    return versionTypeResult.getValue();
}
Exemple #10
0
    // Checks version compatibility with our version
    VersionStatus isConfigVersionCompatible(const VersionType& versionInfo, string* whyNot) {

        string dummy;
        if (!whyNot) whyNot = &dummy;

        // Check if we're empty
        if (versionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion) {
            return VersionStatus_NeedUpgrade;
        }

        // Check that we aren't too old
        if (CURRENT_CONFIG_VERSION < versionInfo.getMinCompatibleVersion()) {

            *whyNot = stream() << "the config version " << CURRENT_CONFIG_VERSION
                               << " of our process is too old "
                               << "for the detected config version "
                               << versionInfo.getMinCompatibleVersion();

            return VersionStatus_Incompatible;
        }

        // Check that the mongo version of this process hasn't been excluded from the cluster
        vector<MongoVersionRange> excludedRanges;
        if (versionInfo.isExcludingMongoVersionsSet() &&
            !MongoVersionRange::parseBSONArray(versionInfo.getExcludingMongoVersions(),
                                               &excludedRanges,
                                               whyNot))
        {

            *whyNot = stream() << "could not understand excluded version ranges"
                               << causedBy(whyNot);

            return VersionStatus_Incompatible;
        }

        // versionString is the global version of this process
        if (isInMongoVersionRanges(versionString, excludedRanges)) {

            // Cast needed here for MSVC compiler issue
            *whyNot = stream() << "not compatible with current config version, version "
                               << reinterpret_cast<const char*>(versionString)
                               << "has been excluded.";

            return VersionStatus_Incompatible;
        }

        // Check if we need to upgrade
        if (versionInfo.getCurrentVersion() >= CURRENT_CONFIG_VERSION) {
            return VersionStatus_Compatible;
        }

        return VersionStatus_NeedUpgrade;
    }
    /**
     * Upgrade v0 to v7 described here
     *
     * This upgrade takes the config server from empty to an initial version.
     */
    bool doUpgradeV0ToV7(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg) {

        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion);

        //
        // Even though the initial config write is a single-document update, that single document
        // is on multiple config servers and requests can interleave.  The upgrade lock prevents
        // this.
        //

        log() << "writing initial config version at v" << CURRENT_CONFIG_VERSION;

        OID newClusterId = OID::gen();

        VersionType versionInfo;

        // Upgrade to new version
        versionInfo.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
        versionInfo.setCurrentVersion(CURRENT_CONFIG_VERSION);
        versionInfo.setClusterId(newClusterId);

        verify(versionInfo.isValid(NULL));

        // If the cluster has not previously been initialized, we need to set the version before
        // using so subsequent mongoses use the config data the same way.  This requires all three
        // config servers online initially.
        Status result = grid.catalogManager()->update(
                                VersionType::ConfigNS,
                                BSON("_id" << 1),
                                versionInfo.toBSON(),
                                true,   // upsert
                                false,  // multi
                                NULL);

        if (!result.isOK()) {
            *errMsg = stream() << "error writing initial config version: "
                               << result.reason();
            return false;
        }

        return true;
    }
    /**
     * Upgrade v0 to v5 described here
     *
     * This upgrade takes the config server from empty to an initial version.
     */
    bool doUpgradeV0ToV5(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg)
    {
        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion);

        //
        // Even though the initial config write is a single-document update, that single document
        // is on multiple config servers and requests can interleave.  The upgrade lock prevents
        // this.
        //

        log() << "writing initial config version at v" << CURRENT_CONFIG_VERSION << endl;

        OID newClusterId = OID::gen();

        VersionType versionInfo;

        // Upgrade to new version
        versionInfo.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
        versionInfo.setCurrentVersion(CURRENT_CONFIG_VERSION);
        versionInfo.setClusterId(newClusterId);

        verify(versionInfo.isValid(NULL));

        // If the cluster has not previously been initialized, we need to set the version before
        // using so subsequent mongoses use the config data the same way.  This requires all three
        // config servers online initially.
        try {
            ScopedDbConnection conn(configLoc, 30);
            conn->update(VersionType::ConfigNS, BSON("_id" << 1), versionInfo.toBSON(), true);
            _checkGLE(conn);
            conn.done();
        }
        catch (const DBException& e) {

            *errMsg = stream() << "error writing initial config version" << causedBy(e);

            return false;
        }

        return true;
    }
    TEST_F(ConfigUpgradeTests, InitialUpgrade) {

        //
        // Tests initializing the config server to the initial version
        //

        // Empty version
        VersionType versionOld;
        VersionType version;
        string errMsg;
        bool result = checkAndUpgradeConfigVersion(grid.catalogManager(),
                                                   false,
                                                   &versionOld,
                                                   &version,
                                                   &errMsg);

        ASSERT(result);
        ASSERT_EQUALS(versionOld.getCurrentVersion(), 0);
        ASSERT_EQUALS(version.getMinCompatibleVersion(), MIN_COMPATIBLE_CONFIG_VERSION);
        ASSERT_EQUALS(version.getCurrentVersion(), CURRENT_CONFIG_VERSION);
        ASSERT_NOT_EQUALS(version.getClusterId(), OID());
    }
Exemple #14
0
Status checkAndInitConfigVersion(OperationContext* txn,
                                 CatalogManager* catalogManager,
                                 DistLockManager* distLockManager) {
    VersionType versionInfo;
    Status status = getConfigVersion(catalogManager, &versionInfo);
    if (!status.isOK()) {
        return status;
    }

    string errMsg;
    VersionStatus comp = isConfigVersionCompatible(versionInfo, &errMsg);

    if (comp == VersionStatus_Incompatible)
        return {ErrorCodes::IncompatibleShardingMetadata, errMsg};
    if (comp == VersionStatus_Compatible)
        return Status::OK();

    invariant(comp == VersionStatus_NeedUpgrade);

    if (versionInfo.getCurrentVersion() != UpgradeHistory_EmptyVersion) {
        return {ErrorCodes::IncompatibleShardingMetadata,
                stream() << "newer version " << CURRENT_CONFIG_VERSION
                         << " of mongo config metadata is required, "
                         << "current version is " << versionInfo.getCurrentVersion()};
    }

    // Contact the config servers to make sure all are online - otherwise we wait a long time
    // for locks.
    status = _checkConfigServersAlive(grid.shardRegistry()->getConfigServerConnectionString());
    if (!status.isOK()) {
        return status;
    }

    //
    // Acquire a lock for the upgrade process.
    //
    // We want to ensure that only a single mongo process is upgrading the config server at a
    // time.
    //

    string whyMessage(stream() << "initializing config database to new format v"
                               << CURRENT_CONFIG_VERSION);
    auto lockTimeout = stdx::chrono::minutes(20);
    auto scopedDistLock = distLockManager->lock(txn, "configUpgrade", whyMessage, lockTimeout);
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    //
    // Double-check compatibility inside the upgrade lock
    // Another process may have won the lock earlier and done the upgrade for us, check
    // if this is the case.
    //

    status = getConfigVersion(catalogManager, &versionInfo);
    if (!status.isOK()) {
        return status;
    }

    comp = isConfigVersionCompatible(versionInfo, &errMsg);

    if (comp == VersionStatus_Incompatible) {
        return {ErrorCodes::IncompatibleShardingMetadata, errMsg};
    }
    if (comp == VersionStatus_Compatible)
        return Status::OK();

    invariant(comp == VersionStatus_NeedUpgrade);

    //
    // Run through the upgrade steps necessary to bring our config version to the current
    // version
    //

    log() << "initializing config server version to " << CURRENT_CONFIG_VERSION;

    status = makeConfigVersionDocument(txn, catalogManager);
    if (!status.isOK())
        return status;

    log() << "initialization of config server to v" << CURRENT_CONFIG_VERSION << " successful";

    return Status::OK();
}
    /**
     * Upgrade v3 to v4 described here.
     *
     * This upgrade takes a config server without collection epochs (potentially) and adds
     * epochs to all mongo processes.
     *
     */
    bool doUpgradeV3ToV4(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg)
    {
        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_NoEpochVersion);

        if (lastVersionInfo.isUpgradeIdSet() && lastVersionInfo.getUpgradeId().isSet()) {

            //
            // Another upgrade failed, so cleanup may be necessary
            //

            BSONObj lastUpgradeState = lastVersionInfo.getUpgradeState();

            bool inCriticalSection;
            if (!FieldParser::extract(lastUpgradeState,
                                      inCriticalSectionField,
                                      &inCriticalSection,
                                      errMsg))
            {

                *errMsg = stream() << "problem reading previous upgrade state" << causedBy(errMsg);

                return false;
            }

            if (inCriticalSection) {

                // Manual intervention is needed here.  Somehow our upgrade didn't get applied
                // consistently across config servers.

                *errMsg = cannotCleanupMessage;

                return false;
            }

            if (!_cleanupUpgradeState(configLoc, lastVersionInfo.getUpgradeId(), errMsg)) {
                
                // If we can't cleanup the old upgrade state, the user might have done it for us,
                // not a fatal problem (we'll just end up with extra collections).
                
                warning() << "could not cleanup previous upgrade state" << causedBy(errMsg) << endl;
                *errMsg = "";
            }
        }

        //
        // Check the versions of other mongo processes in the cluster before upgrade.
        // We can't upgrade if there are active pre-v2.2 processes in the cluster
        //

        Status mongoVersionStatus = checkClusterMongoVersions(configLoc,
                                                              string(minMongoProcessVersion));

        if (!mongoVersionStatus.isOK()) {

            *errMsg = stream() << "cannot upgrade with pre-v" << minMongoProcessVersion
                               << " mongo processes active in the cluster"
                               << causedBy(mongoVersionStatus);

            return false;
        }

        VersionType newVersionInfo;
        lastVersionInfo.cloneTo(&newVersionInfo);

        // Set our upgrade id and state
        OID upgradeId = OID::gen();
        newVersionInfo.setUpgradeId(upgradeId);
        newVersionInfo.setUpgradeState(BSONObj());

        // Write our upgrade id and state
        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(3)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                *errMsg = stream() << "could not initialize version info for upgrade"
                                   << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // First lock all collection namespaces that exist
        //

        OwnedPointerMap<string, CollectionType> ownedCollections;
        const map<string, CollectionType*>& collections = ownedCollections.map();

        Status findCollectionsStatus = findAllCollectionsV3(configLoc, &ownedCollections);

        if (!findCollectionsStatus.isOK()) {

            *errMsg = stream() << "could not read collections from config server"
                               << causedBy(findCollectionsStatus);

            return false;
        }

        //
        // Acquire locks for all sharded collections
        // Something that didn't involve getting thousands of locks would be better.
        //

        OwnedPointerVector<ScopedDistributedLock> collectionLocks;

        log() << "acquiring locks for " << collections.size() << " sharded collections..." << endl;
        
        // WARNING - this string is used programmatically when forcing locks, be careful when
        // changing!
        // TODO: Add programmatic "why" field to lock collection
        string lockMessage = str::stream() << "ensuring epochs for config upgrade"
                                           << " (" << upgradeId.toString() << ")";
        
        if (!_acquireAllCollectionLocks(configLoc,
                                        collections,
                                        lockMessage,
                                        20 * 60 * 1000,
                                        &collectionLocks,
                                        errMsg))
        {

            *errMsg = stream() << "could not acquire all namespace locks for upgrade" 
                               << " (" << upgradeId.toString() << ")"
                               << causedBy(errMsg);

            return false;
        }

        // We are now preventing all splits and migrates for all sharded collections

        // Get working and backup suffixes
        string workingSuffix = genWorkingSuffix(upgradeId);
        string backupSuffix = genBackupSuffix(upgradeId);

        log() << "copying collection and chunk metadata to working and backup collections..."
              << endl;

        // Get a backup and working copy of the config.collections and config.chunks collections

        Status copyStatus = copyFrozenCollection(configLoc,
                                                 CollectionType::ConfigNS,
                                                 CollectionType::ConfigNS + workingSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to "
                               << (CollectionType::ConfigNS + workingSuffix)
                               << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          CollectionType::ConfigNS,
                                          CollectionType::ConfigNS + backupSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to "
                               << (CollectionType::ConfigNS + backupSuffix) << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          ChunkType::ConfigNS,
                                          ChunkType::ConfigNS + workingSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to "
                               << (ChunkType::ConfigNS + workingSuffix) << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          ChunkType::ConfigNS,
                                          ChunkType::ConfigNS + backupSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to "
                               << (ChunkType::ConfigNS + backupSuffix) << causedBy(copyStatus);

            return false;
        }

        //
        // Go through sharded collections one-by-one and add epochs where missing
        //

        for (map<string, CollectionType*>::const_iterator it = collections.begin();
                it != collections.end(); ++it)
        {
            // Create a copy so that we can change the epoch later
            CollectionType collection;
            it->second->cloneTo(&collection);

            log() << "checking epochs for " << collection.getNS() << " collection..." << endl;

            OID epoch = collection.getEpoch();

            //
            // Go through chunks to find epoch if we haven't found it or to verify epoch is the same
            //

            OwnedPointerVector<ChunkType> ownedChunks;
            const vector<ChunkType*>& chunks = ownedChunks.vector();

            Status findChunksStatus = findAllChunks(configLoc, collection.getNS(), &ownedChunks);

            if (!findChunksStatus.isOK()) {

                *errMsg = stream() << "could not read chunks from config server"
                                   << causedBy(findChunksStatus);

                return false;
            }

            for (vector<ChunkType*>::const_iterator chunkIt = chunks.begin();
                    chunkIt != chunks.end(); ++chunkIt)
            {
                const ChunkType& chunk = *(*chunkIt);

                // If our chunk epoch is set and doesn't match
                if (epoch.isSet() && chunk.getVersion().epoch().isSet()
                    && chunk.getVersion().epoch() != epoch)
                {

                    *errMsg = stream() << "chunk epoch for " << chunk.toString() << " in "
                                       << collection.getNS() << " does not match found epoch "
                                       << epoch;

                    return false;
                }
                else if (!epoch.isSet() && chunk.getVersion().epoch().isSet()) {
                    epoch = chunk.getVersion().epoch();
                }
            }

            //
            // Write collection epoch if needed
            //

            if (!collection.getEpoch().isSet()) {

                OID newEpoch = OID::gen();

                log() << "writing new epoch " << newEpoch << " for " << collection.getNS()
                      << " collection..." << endl;

                scoped_ptr<ScopedDbConnection> connPtr;

                try {
                    connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                    ScopedDbConnection& conn = *connPtr;

                    conn->update(CollectionType::ConfigNS + workingSuffix,
                                 BSON(CollectionType::ns(collection.getNS())),
                                 BSON("$set" << BSON(CollectionType::DEPRECATED_lastmodEpoch(newEpoch))));
                    _checkGLE(conn);
                }
                catch (const DBException& e) {

                    *errMsg = stream() << "could not write a new epoch for " << collection.getNS()
                                       << causedBy(e);

                    return false;
                }

                connPtr->done();
                collection.setEpoch(newEpoch);
            }

            epoch = collection.getEpoch();
            verify(epoch.isSet());

            //
            // Now write verified epoch to all chunks
            //

            log() << "writing epoch " << epoch << " for " << chunks.size() << " chunks in "
                  << collection.getNS() << " collection..." << endl;

            {
                scoped_ptr<ScopedDbConnection> connPtr;

                try {
                    connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                    ScopedDbConnection& conn = *connPtr;

                    // Multi-update of all chunks
                    conn->update(ChunkType::ConfigNS + workingSuffix,
                                 BSON(ChunkType::ns(collection.getNS())),
                                 BSON("$set" << BSON(ChunkType::DEPRECATED_epoch(epoch))),
                                 false,
                                 true); // multi
                    _checkGLE(conn);
                }
                catch (const DBException& e) {

                    *errMsg = stream() << "could not write a new epoch " << epoch.toString()
                                       << " for chunks in " << collection.getNS() << causedBy(e);

                    return false;
                }

                connPtr->done();
            }
        }

        //
        // Paranoid verify the collection writes
        //

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                // Find collections with no epochs
                BSONObj emptyDoc =
                        conn->findOne(CollectionType::ConfigNS + workingSuffix,
                                      BSON("$unset" << BSON(CollectionType::DEPRECATED_lastmodEpoch() << 1)));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "collection " << emptyDoc
                                       << " is still missing epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find collections with empty epochs
                emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix,
                                         BSON(CollectionType::DEPRECATED_lastmodEpoch(OID())));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "collection " << emptyDoc
                                       << " still has empty epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find chunks with no epochs
                emptyDoc =
                        conn->findOne(ChunkType::ConfigNS + workingSuffix,
                                      BSON("$unset" << BSON(ChunkType::DEPRECATED_epoch() << 1)));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "chunk " << emptyDoc
                                       << " is still missing epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find chunks with empty epochs
                emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix,
                                         BSON(ChunkType::DEPRECATED_epoch(OID())));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "chunk " << emptyDoc
                                       << " still has empty epoch after config upgrade";

                    connPtr->done();
                    return false;
                }
            }
            catch (const DBException& e) {

                *errMsg = stream() << "could not verify epoch writes" << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // Double check that our collections haven't changed
        //

        Status idCheckStatus = checkIdsTheSame(configLoc,
                                               CollectionType::ConfigNS,
                                               CollectionType::ConfigNS + workingSuffix);

        if (!idCheckStatus.isOK()) {

            *errMsg = stream() << CollectionType::ConfigNS
                               << " was modified while working on upgrade"
                               << causedBy(idCheckStatus);

            return false;
        }

        idCheckStatus = checkIdsTheSame(configLoc,
                                        ChunkType::ConfigNS,
                                        ChunkType::ConfigNS + workingSuffix);

        if (!idCheckStatus.isOK()) {

            *errMsg = stream() << ChunkType::ConfigNS << " was modified while working on upgrade"
                               << causedBy(idCheckStatus);

            return false;
        }

        //
        // ENTER CRITICAL SECTION
        //

        newVersionInfo.setUpgradeState(BSON(inCriticalSectionField(true)));

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(3)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                // No cleanup message here since we're not sure if we wrote or not, and
                // not dangerous either way except to prevent further updates (at which point
                // the message is printed)
                *errMsg = stream()
                        << "could not update version info to enter critical update section"
                        << causedBy(e);

                return false;
            }

            // AT THIS POINT ANY FAILURE REQUIRES MANUAL INTERVENTION!
            connPtr->done();
        }

        log() << "entered critical section for config upgrade" << endl;

        Status overwriteStatus = overwriteCollection(configLoc,
                                                     CollectionType::ConfigNS + workingSuffix,
                                                     CollectionType::ConfigNS);

        if (!overwriteStatus.isOK()) {

            error() << cleanupMessage << endl;
            *errMsg = stream() << "could not overwrite collection " << CollectionType::ConfigNS
                               << " with working collection "
                               << (CollectionType::ConfigNS + workingSuffix)
                               << causedBy(overwriteStatus);

            return false;
        }

        overwriteStatus = overwriteCollection(configLoc,
                                              ChunkType::ConfigNS + workingSuffix,
                                              ChunkType::ConfigNS);

        if (!overwriteStatus.isOK()) {

            error() << cleanupMessage << endl;
            *errMsg = stream() << "could not overwrite collection " << ChunkType::ConfigNS
                               << " with working collection "
                               << (ChunkType::ConfigNS + workingSuffix)
                               << causedBy(overwriteStatus);

            return false;
        }

        //
        // Finally update the version to latest and add clusterId to version
        //

        OID newClusterId = OID::gen();

        // Note: hardcoded versions, since this is a very particular upgrade
        // Note: DO NOT CLEAR the config version unless bumping the minCompatibleVersion,
        // we want to save the excludes that were set.

        newVersionInfo.setMinCompatibleVersion(UpgradeHistory_NoEpochVersion);
        newVersionInfo.setCurrentVersion(UpgradeHistory_MandatoryEpochVersion);
        newVersionInfo.setClusterId(newClusterId);

        // Leave critical section
        newVersionInfo.unsetUpgradeId();
        newVersionInfo.unsetUpgradeState();

        log() << "writing new version info and clusterId " << newClusterId << "..." << endl;

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(UpgradeHistory_NoEpochVersion)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                error() << cleanupMessage << endl;
                *errMsg = stream() << "could not write new version info "
                                   << "and exit critical upgrade section" << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // END CRITICAL SECTION
        //

        return true;
    }
 /**
  * Stores a newer { version, minVersion, currentVersion, clusterId } config server entry
  */
 void storeConfigVersion(const VersionType& versionInfo) {
     DBDirectClient client(&_txn);
     client.insert(VersionType::ConfigNS, versionInfo.toBSON());
 }
Exemple #17
0
 /**
  * Stores a newer { version, minVersion, currentVersion, clusterId } config server entry
  */
 void storeConfigVersion(const VersionType& versionInfo) {
     client().insert(VersionType::ConfigNS, versionInfo.toBSON());
 }
/**
 * Upgrades v5 to v6.
 */
bool doUpgradeV5ToV6(const ConnectionString& configLoc,
                     const VersionType& lastVersionInfo,
                     string* errMsg) {
    string dummy;
    if (!errMsg)
        errMsg = &dummy;

    verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_DummyBumpPre2_6);
    Status result = preUpgradeCheck(configLoc, lastVersionInfo, minMongoProcessVersion);

    if (!result.isOK()) {
        if (result.code() == ErrorCodes::ManualInterventionRequired) {
            *errMsg = cannotCleanupMessage;
        } else {
            *errMsg = result.toString();
        }

        return false;
    }

    // This is not needed because we are not actually going to make any modifications
    // on the other collections in the config server for this particular upgrade.
    // startConfigUpgrade(configLoc.toString(),
    //                    lastVersionInfo.getCurrentVersion(),
    //                    OID::gen());

    // If we actually need to modify something in the config servers these need to follow
    // after calling startConfigUpgrade(...):
    //
    // 1. Acquire necessary locks.
    // 2. Make a backup of the collections we are about to modify.
    // 3. Perform the upgrade process on the backup collection.
    // 4. Verify that no changes were made to the collections since the backup was performed.
    // 5. Call enterConfigUpgradeCriticalSection(configLoc.toString(),
    //    lastVersionInfo.getCurrentVersion()).
    // 6. Rename the backup collection to the name of the original collection with
    //    dropTarget set to true.

    // Make sure the { ts: 1 } index is not unique by dropping the existing one
    // and rebuilding the index with the right specification.

    const BSONObj lockIdxKey = BSON(LocksType::lockID() << 1);
    const NamespaceString indexNS(LocksType::ConfigNS);

    bool dropOk = false;
    try {
        ScopedDbConnection conn(configLoc);
        BSONObj dropResponse;
        dropOk = conn->runCommand(indexNS.db().toString(),
                                  BSON("dropIndexes" << indexNS.coll() << "index" << lockIdxKey),
                                  dropResponse);
        conn.done();
    } catch (const DBException& ex) {
        if (ex.getCode() == 13105) {
            // 13105 is the exception code from SyncClusterConnection::findOne that gets
            // thrown when one of the command responses has an "ok" field that is not true.
            dropOk = false;
        } else {
            *errMsg = str::stream() << "Failed to drop { ts: 1 } index" << causedBy(ex);
            return false;
        }
    }

    if (!dropOk && hasBadIndex(configLoc, errMsg)) {
        // Fail only if the index still exists.
        return false;
    }

    result = clusterCreateIndex(LocksType::ConfigNS,
                                BSON(LocksType::lockID() << 1),
                                false,  // unique
                                WriteConcernOptions::AllConfigs,
                                NULL);

    if (!result.isOK()) {
        *errMsg = str::stream() << "error while creating { ts: 1 } index on config db"
                                << causedBy(result);
        return false;
    }

    LOG(1) << "Checking to make sure that the right { ts: 1 } index is created...";

    if (hasBadIndex(configLoc, errMsg)) {
        return false;
    }

    // We're only after the version bump in commitConfigUpgrade here since we never
    // get into the critical section.
    Status commitStatus = commitConfigUpgrade(configLoc.toString(),
                                              lastVersionInfo.getCurrentVersion(),
                                              MIN_COMPATIBLE_CONFIG_VERSION,
                                              CURRENT_CONFIG_VERSION);

    if (!commitStatus.isOK()) {
        *errMsg = commitStatus.toString();
        return false;
    }

    return true;
}
Status CatalogManagerReplicaSet::initConfigVersion(OperationContext* txn) {
    for (int x = 0; x < kMaxConfigVersionInitRetry; x++) {
        auto versionStatus = _getConfigVersion(txn);
        if (!versionStatus.isOK()) {
            return versionStatus.getStatus();
        }

        auto versionInfo = versionStatus.getValue();
        if (versionInfo.getMinCompatibleVersion() > CURRENT_CONFIG_VERSION) {
            return {ErrorCodes::IncompatibleShardingConfigVersion,
                    str::stream() << "current version v" << CURRENT_CONFIG_VERSION
                                  << " is older than the cluster min compatible v"
                                  << versionInfo.getMinCompatibleVersion()};
        }

        if (versionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion) {
            VersionType newVersion;
            newVersion.setClusterId(OID::gen());
            newVersion.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
            newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION);

            BSONObj versionObj(newVersion.toBSON());
            BatchedCommandResponse response;
            auto upsertStatus = update(txn,
                                       VersionType::ConfigNS,
                                       versionObj,
                                       versionObj,
                                       true /* upsert*/,
                                       false /* multi */,
                                       &response);

            if ((upsertStatus.isOK() && response.getN() < 1) ||
                upsertStatus == ErrorCodes::DuplicateKey) {
                // Do the check again as someone inserted a new config version document
                // and the upsert neither inserted nor updated a config version document.
                // Note: you can get duplicate key errors on upsert because of SERVER-14322.
                continue;
            }

            return upsertStatus;
        }

        if (versionInfo.getCurrentVersion() == UpgradeHistory_UnreportedVersion) {
            return {ErrorCodes::IncompatibleShardingConfigVersion,
                    "Assuming config data is old since the version document cannot be found in the "
                    "config server and it contains databases aside 'local' and 'admin'. "
                    "Please upgrade if this is the case. Otherwise, make sure that the config "
                    "server is clean."};
        }

        if (versionInfo.getCurrentVersion() < CURRENT_CONFIG_VERSION) {
            return {ErrorCodes::IncompatibleShardingConfigVersion,
                    str::stream() << "need to upgrade current cluster version to v"
                                  << CURRENT_CONFIG_VERSION << "; currently at v"
                                  << versionInfo.getCurrentVersion()};
        }

        return Status::OK();
    }

    return {ErrorCodes::IncompatibleShardingConfigVersion,
            str::stream() << "unable to create new config version document after "
                          << kMaxConfigVersionInitRetry << " retries"};
}
StatusWith<VersionType> CatalogManagerReplicaSet::_getConfigVersion() {
    const auto configShard = grid.shardRegistry()->getShard("config");
    const auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto readHost = readHostStatus.getValue();
    auto findStatus = grid.shardRegistry()->exhaustiveFind(readHost,
                                                           NamespaceString(VersionType::ConfigNS),
                                                           BSONObj(),
                                                           BSONObj(),
                                                           boost::none /* no limit */);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    auto queryResults = findStatus.getValue();

    if (queryResults.size() > 1) {
        return {ErrorCodes::RemoteValidationError,
                str::stream() << "should only have 1 document in " << VersionType::ConfigNS};
    }

    if (queryResults.empty()) {
        auto cmdStatus =
            grid.shardRegistry()->runCommand(readHost, "admin", BSON("listDatabases" << 1));
        if (!cmdStatus.isOK()) {
            return cmdStatus.getStatus();
        }

        const BSONObj& cmdResult = cmdStatus.getValue();

        Status cmdResultStatus = getStatusFromCommandResult(cmdResult);
        if (!cmdResultStatus.isOK()) {
            return cmdResultStatus;
        }

        for (const auto& dbEntry : cmdResult["databases"].Obj()) {
            const string& dbName = dbEntry["name"].String();

            if (dbName != "local" && dbName != "admin") {
                VersionType versionInfo;
                versionInfo.setMinCompatibleVersion(UpgradeHistory_UnreportedVersion);
                versionInfo.setCurrentVersion(UpgradeHistory_UnreportedVersion);
                return versionInfo;
            }
        }

        VersionType versionInfo;
        versionInfo.setMinCompatibleVersion(UpgradeHistory_EmptyVersion);
        versionInfo.setCurrentVersion(UpgradeHistory_EmptyVersion);
        return versionInfo;
    }

    BSONObj versionDoc = queryResults.front();
    auto versionTypeResult = VersionType::fromBSON(versionDoc);
    if (!versionTypeResult.isOK()) {
        return Status(ErrorCodes::UnsupportedFormat,
                      str::stream() << "invalid config version document: " << versionDoc
                                    << versionTypeResult.getStatus().toString());
    }

    return versionTypeResult.getValue();
}
    TEST_F(ConfigUpgradeTests, ClusterIDVersion) {

        //
        // Tests detection of newer config versions
        //

        VersionType newVersion;
        newVersion.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
        newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION);
        storeConfigVersion(newVersion);

        newVersion.clear();

        // Current Version w/o clusterId (invalid!)
        Status status = getConfigVersion(grid.catalogManager(), &newVersion);
        ASSERT(!status.isOK());

        newVersion.clear();

        OID clusterId = OID::gen();
        newVersion.setClusterId(clusterId);
        newVersion.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
        newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION);

        clearVersion();
        storeConfigVersion(newVersion);

        newVersion.clear();

        // Current version w/ clusterId (valid!)
        status = getConfigVersion(grid.catalogManager(), &newVersion);
        ASSERT(status.isOK());

        ASSERT_EQUALS(newVersion.getMinCompatibleVersion(), MIN_COMPATIBLE_CONFIG_VERSION);
        ASSERT_EQUALS(newVersion.getCurrentVersion(), CURRENT_CONFIG_VERSION);
        ASSERT_EQUALS(newVersion.getClusterId(), clusterId);
    }