Esempio n. 1
0
void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard,
                                                     const ConnectionString& newConnString) {
    auto oldConnString = shard->getConnString();
    for (const auto& host : oldConnString.getServers()) {
        _lookup.erase(host.toString());
    }

    _lookup[shard->getId()] = shard;

    if (newConnString.type() == ConnectionString::SET) {
        _rsLookup[newConnString.getSetName()] = shard;
    } else if (newConnString.type() == ConnectionString::CUSTOM) {
        // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which
        // always return "localhost" as their resposne to getServerAddress().  This is just for
        // making dbtest work.
        _lookup["localhost"] = shard;
    }

    // TODO: The only reason to have the shard host names in the lookup table is for the
    // setShardVersion call, which resolves the shard id from the shard address. This is
    // error-prone and will go away eventually when we switch all communications to go through
    // the remote command runner and all nodes are sharding aware by default.
    _lookup[newConnString.toString()] = shard;

    for (const HostAndPort& hostAndPort : newConnString.getServers()) {
        _lookup[hostAndPort.toString()] = shard;
    }
}
    Status DBClientShardResolver::findMaster( const std::string connString,
                                              ConnectionString* resolvedHost ) {
        std::string errMsg;

        ConnectionString rawHost = ConnectionString::parse( connString, errMsg );
        dassert( errMsg == "" );
        dassert( rawHost.type() == ConnectionString::SET
                 || rawHost.type() == ConnectionString::MASTER );

        if ( rawHost.type() == ConnectionString::MASTER ) {
            *resolvedHost = rawHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Don't create the monitor unless we need to - fast path
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get(rawHost.getSetName());

        if (!replMonitor) {
            // Slow path
            std::set<HostAndPort> seedServers(rawHost.getServers().begin(),
                                              rawHost.getServers().end());
            ReplicaSetMonitor::createIfNeeded(rawHost.getSetName(), seedServers);
            replMonitor = ReplicaSetMonitor::get(rawHost.getSetName());
        }

        if (!replMonitor) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert();
            *resolvedHost = ConnectionString::parse( masterHostAndPort.toString(), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
Esempio n. 3
0
void ShardRegistryData::_addShard(WithLock lk,
                                  std::shared_ptr<Shard> const& shard,
                                  bool useOriginalCS) {
    const ShardId shardId = shard->getId();

    const ConnectionString connString =
        useOriginalCS ? shard->originalConnString() : shard->getConnString();

    auto currentShard = _findByShardId(lk, shardId);
    if (currentShard) {
        auto oldConnString = currentShard->originalConnString();

        if (oldConnString.toString() != connString.toString()) {
            log() << "Updating ShardRegistry connection string for shard " << currentShard->getId()
                  << " from: " << oldConnString.toString() << " to: " << connString.toString();
        }

        for (const auto& host : oldConnString.getServers()) {
            _lookup.erase(host.toString());
            _hostLookup.erase(host);
        }
        _lookup.erase(oldConnString.toString());
    }

    _lookup[shard->getId()] = shard;

    LOG(3) << "Adding shard " << shard->getId() << ", with CS " << connString.toString();
    if (connString.type() == ConnectionString::SET) {
        _rsLookup[connString.getSetName()] = shard;
    } else if (connString.type() == ConnectionString::CUSTOM) {
        // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which
        // always return "localhost" as their response to getServerAddress().  This is just for
        // making dbtest work.
        _lookup[ShardId("localhost")] = shard;
        _hostLookup[HostAndPort("localhost")] = shard;
    }

    // TODO: The only reason to have the shard host names in the lookup table is for the
    // setShardVersion call, which resolves the shard id from the shard address. This is
    // error-prone and will go away eventually when we switch all communications to go through
    // the remote command runner and all nodes are sharding aware by default.
    _lookup[connString.toString()] = shard;

    for (const HostAndPort& hostAndPort : connString.getServers()) {
        _lookup[hostAndPort.toString()] = shard;
        _hostLookup[hostAndPort] = shard;
    }
}
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(
    const ConnectionString& connStr) {
    invariant(connStr.type() == ConnectionString::SET);

    stdx::lock_guard<stdx::mutex> lk(_mutex);
    // do not restart taskExecutor if is in shutdown
    if (!_taskExecutor && !_isShutdown) {
        // construct task executor
        auto net = executor::makeNetworkInterface("ReplicaSetMonitor-TaskExecutor");
        auto netPtr = net.get();
        _taskExecutor = stdx::make_unique<ThreadPoolTaskExecutor>(
            stdx::make_unique<NetworkInterfaceThreadPool>(netPtr), std::move(net));
        LOG(1) << "Starting up task executor for monitoring replica sets in response to request to "
                  "monitor set: "
               << connStr.toString();
        _taskExecutor->startup();
    }

    auto setName = connStr.getSetName();
    auto monitor = _monitors[setName].lock();
    if (monitor) {
        return monitor;
    }

    const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end());

    log() << "Starting new replica set monitor for " << connStr.toString();

    auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers);
    _monitors[setName] = newMonitor;
    newMonitor->init();
    return newMonitor;
}
Esempio n. 5
0
    shared_ptr<Shard> ShardRegistry::find(const string& ident) {
        string errmsg;
        ConnectionString connStr = ConnectionString::parse(ident, errmsg);
        uassert(18642,
                str::stream() << "Error parsing connection string: " << ident,
                errmsg.empty());

        if (connStr.type() == ConnectionString::SET) {
            boost::lock_guard<boost::mutex> lk(_rsMutex);
            ShardMap::iterator iter = _rsLookup.find(connStr.getSetName());

            if (iter == _rsLookup.end()) {
                return nullptr;
            }

            return iter->second;
        }
        else {
            boost::lock_guard<boost::mutex> lk(_mutex);
            ShardMap::iterator iter = _lookup.find(ident);

            if (iter == _lookup.end()) {
                return nullptr;
            }

            return iter->second;
        }
    }
Esempio n. 6
0
void ShardRegistry::updateReplSetHosts(const ConnectionString& newConnString) {
    invariant(newConnString.type() == ConnectionString::SET ||
              newConnString.type() == ConnectionString::CUSTOM);  // For dbtests

    // to prevent update config shard connection string during init
    stdx::unique_lock<stdx::mutex> lock(_reloadMutex);
    _data.rebuildShardIfExists(newConnString, _shardFactory.get());
}
Esempio n. 7
0
/**
 * Returns the currently-set config hosts for a cluster
 */
static vector<ConnectionString> getConfigHosts() {
    vector<ConnectionString> configHosts;
    ConnectionString configHostOrHosts = configServer.getConnectionString();
    if (configHostOrHosts.type() == ConnectionString::MASTER) {
        configHosts.push_back(configHostOrHosts);
    } else if (configHostOrHosts.type() == ConnectionString::SYNC) {
        vector<HostAndPort> configHPs = configHostOrHosts.getServers();
        for (vector<HostAndPort>::iterator it = configHPs.begin(); it != configHPs.end(); ++it) {
            configHosts.push_back(ConnectionString(*it));
        }
    } else {
        // This is only for tests.
        dassert(configHostOrHosts.type() == ConnectionString::CUSTOM);
        configHosts.push_back(configHostOrHosts);
    }

    return configHosts;
}
Status CatalogManagerReplicaSet::init(const ConnectionString& configCS,
                                      std::unique_ptr<DistLockManager> distLockManager) {
    invariant(configCS.type() == ConnectionString::SET);

    _configServerConnectionString = configCS;
    _distLockManager = std::move(distLockManager);

    return Status::OK();
}
Esempio n. 9
0
Status initializeGlobalShardingState(OperationContext* txn,
                                     const ConnectionString& configCS,
                                     bool allowNetworking) {
    if (configCS.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Unrecognized connection string."};
    }

    auto network =
        executor::makeNetworkInterface("NetworkInterfaceASIO-ShardRegistry",
                                       stdx::make_unique<ShardingNetworkConnectionHook>(),
                                       stdx::make_unique<ShardingEgressMetadataHook>());
    auto networkPtr = network.get();
    auto shardRegistry(
        stdx::make_unique<ShardRegistry>(stdx::make_unique<RemoteCommandTargeterFactoryImpl>(),
                                         makeTaskExecutorPool(std::move(network)),
                                         networkPtr,
                                         makeTaskExecutor(executor::makeNetworkInterface(
                                             "NetworkInterfaceASIO-ShardRegistry-TaskExecutor")),
                                         configCS));

    auto catalogManager = makeCatalogManager(getGlobalServiceContext(),
                                             shardRegistry.get(),
                                             HostAndPort(getHostName(), serverGlobalParams.port));

    shardRegistry->startup();
    grid.init(std::move(catalogManager),
              std::move(shardRegistry),
              stdx::make_unique<ClusterCursorManager>(getGlobalServiceContext()->getClockSource()));

    while (!inShutdown()) {
        try {
            Status status = grid.catalogManager(txn)->startup(txn, allowNetworking);
            uassertStatusOK(status);

            if (serverGlobalParams.configsvrMode == CatalogManager::ConfigServerMode::NONE) {
                grid.shardRegistry()->reload(txn);
            }
            return Status::OK();
        } catch (const DBException& ex) {
            Status status = ex.toStatus();
            if (status == ErrorCodes::ReplicaSetNotFound) {
                // ReplicaSetNotFound most likely means we've been waiting for the config replica
                // set to come up for so long that the ReplicaSetMonitor stopped monitoring the set.
                // Rebuild the config shard to force the monitor to resume monitoring the config
                // servers.
                grid.shardRegistry()->rebuildConfigShard();
            }
            log() << "Error initializing sharding state, sleeping for 2 seconds and trying again"
                  << causedBy(status);
            sleepmillis(2000);
            continue;
        }
    }

    return Status::OK();
}
Esempio n. 10
0
    Status DBClientShardResolver::findMaster( const std::string connString,
                                              ConnectionString* resolvedHost ) {
        std::string errMsg;

        ConnectionString rawHost = ConnectionString::parse( connString, errMsg );
        dassert( errMsg == "" );
        dassert( rawHost.type() == ConnectionString::SET
                 || rawHost.type() == ConnectionString::MASTER );

        if ( rawHost.type() == ConnectionString::MASTER ) {
            *resolvedHost = rawHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Does not reload the monitor if it doesn't currently exist
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawHost.getSetName(),
                                                                   false );
        if ( !replMonitor ) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert();
            *resolvedHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
Status initializeGlobalShardingState(OperationContext* txn,
                                     const ConnectionString& configCS,
                                     StringData distLockProcessId,
                                     std::unique_ptr<ShardFactory> shardFactory,
                                     rpc::ShardingEgressMetadataHookBuilder hookBuilder,
                                     ShardingCatalogManagerBuilder catalogManagerBuilder) {
    if (configCS.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Unrecognized connection string."};
    }

    auto network =
        executor::makeNetworkInterface("NetworkInterfaceASIO-ShardRegistry",
                                       stdx::make_unique<ShardingNetworkConnectionHook>(),
                                       hookBuilder());
    auto networkPtr = network.get();
    auto executorPool = makeTaskExecutorPool(std::move(network), hookBuilder());
    executorPool->startup();

    auto shardRegistry(stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS));

    auto catalogClient =
        makeCatalogClient(txn->getServiceContext(), shardRegistry.get(), distLockProcessId);

    auto rawCatalogClient = catalogClient.get();

    std::unique_ptr<ShardingCatalogManager> catalogManager = catalogManagerBuilder(
        rawCatalogClient,
        makeTaskExecutor(executor::makeNetworkInterface("AddShard-TaskExecutor")));
    auto rawCatalogManager = catalogManager.get();

    grid.init(
        std::move(catalogClient),
        std::move(catalogManager),
        stdx::make_unique<CatalogCache>(),
        std::move(shardRegistry),
        stdx::make_unique<ClusterCursorManager>(getGlobalServiceContext()->getPreciseClockSource()),
        stdx::make_unique<BalancerConfiguration>(),
        std::move(executorPool),
        networkPtr);

    auto status = rawCatalogClient->startup();
    if (!status.isOK()) {
        return status;
    }

    if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
        // Only config servers get a ShardingCatalogManager.
        status = rawCatalogManager->startup();
        if (!status.isOK()) {
            return status;
        }
    }

    return Status::OK();
}
Esempio n. 12
0
    shared_ptr<ReplicaSetMonitor>
    ReplicaSetMonitorManager::getOrCreateMonitor(const ConnectionString& connStr) {
        invariant(connStr.type() == ConnectionString::SET);

        stdx::lock_guard<stdx::mutex> lk(_mutex);

        shared_ptr<ReplicaSetMonitor>& monitor = _monitors[connStr.getSetName()];
        if (!monitor) {
            const std::set<HostAndPort> servers(connStr.getServers().begin(),
                                                connStr.getServers().end());

            monitor = std::make_shared<ReplicaSetMonitor>(connStr.getSetName(), servers);
        }

        return monitor;
    }
std::unique_ptr<RemoteCommandTargeter> RemoteCommandTargeterFactoryImpl::create(
    const ConnectionString& connStr) {
    switch (connStr.type()) {
        case ConnectionString::MASTER:
        case ConnectionString::CUSTOM:
            invariant(connStr.getServers().size() == 1);
            return stdx::make_unique<RemoteCommandTargeterStandalone>(connStr.getServers().front());
        case ConnectionString::SET:
            return stdx::make_unique<RemoteCommandTargeterRS>(connStr.getSetName(),
                                                              connStr.getServers());
        case ConnectionString::INVALID:
            // These connections should never be seen
            break;
    }

    MONGO_UNREACHABLE;
}
Esempio n. 14
0
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(
    const ConnectionString& connStr) {
    invariant(connStr.type() == ConnectionString::SET);

    stdx::lock_guard<stdx::mutex> lk(_mutex);
    _setupTaskExecutorInLock(connStr.toString());
    auto setName = connStr.getSetName();
    auto monitor = _monitors[setName].lock();
    if (monitor) {
        uassertNotMixingSSL(monitor->getOriginalUri().getSSLMode(), transport::kGlobalSSLMode);
        return monitor;
    }

    log() << "Starting new replica set monitor for " << connStr.toString();

    auto newMonitor = std::make_shared<ReplicaSetMonitor>(MongoURI(connStr));
    _monitors[setName] = newMonitor;
    newMonitor->init();
    return newMonitor;
}
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(
    const ConnectionString& connStr) {
    invariant(connStr.type() == ConnectionString::SET);

    stdx::lock_guard<stdx::mutex> lk(_mutex);
    _setupTaskExecutorInLock(connStr.toString());
    auto setName = connStr.getSetName();
    auto monitor = _monitors[setName].lock();
    if (monitor) {
        return monitor;
    }

    const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end());

    log() << "Starting new replica set monitor for " << connStr.toString();

    auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers);
    _monitors[setName] = newMonitor;
    newMonitor->init();
    return newMonitor;
}
Esempio n. 16
0
Status initializeGlobalShardingState(const ConnectionString& configCS,
                                     uint64_t maxChunkSizeBytes,
                                     std::unique_ptr<ShardFactory> shardFactory,
                                     rpc::ShardingEgressMetadataHookBuilder hookBuilder) {
    if (configCS.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Unrecognized connection string."};
    }

    auto network =
        executor::makeNetworkInterface("NetworkInterfaceASIO-ShardRegistry",
                                       stdx::make_unique<ShardingNetworkConnectionHook>(),
                                       hookBuilder());
    auto networkPtr = network.get();
    auto executorPool = makeTaskExecutorPool(std::move(network), hookBuilder());
    executorPool->startup();

    auto shardRegistry(stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS));

    auto catalogManager = makeCatalogManager(getGlobalServiceContext(),
                                             shardRegistry.get(),
                                             HostAndPort(getHostName(), serverGlobalParams.port));

    auto rawCatalogManager = catalogManager.get();
    grid.init(
        std::move(catalogManager),
        stdx::make_unique<CatalogCache>(),
        std::move(shardRegistry),
        stdx::make_unique<ClusterCursorManager>(getGlobalServiceContext()->getPreciseClockSource()),
        stdx::make_unique<BalancerConfiguration>(maxChunkSizeBytes),
        std::move(executorPool),
        networkPtr);

    auto status = rawCatalogManager->startup();
    if (!status.isOK()) {
        return status;
    }

    return Status::OK();
}
Esempio n. 17
0
void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard,
                                                     const ConnectionString& newConnString) {
    auto oldConnString = shard->getConnString();
    for (const auto& host : oldConnString.getServers()) {
        _lookup.erase(host.toString());
    }

    _lookup[shard->getId()] = shard;

    if (newConnString.type() == ConnectionString::SET) {
        _rsLookup[newConnString.getSetName()] = shard;
    }

    // TODO: The only reason to have the shard host names in the lookup table is for the
    // setShardVersion call, which resolves the shard id from the shard address. This is
    // error-prone and will go away eventually when we switch all communications to go through
    // the remote command runner and all nodes are sharding aware by default.
    _lookup[newConnString.toString()] = shard;

    for (const HostAndPort& hostAndPort : newConnString.getServers()) {
        _lookup[hostAndPort.toString()] = shard;
    }
}
Esempio n. 18
0
    Status DBClientShardResolver::chooseWriteHost( const string& shardName,
                                                   ConnectionString* shardHost ) const {

        // Declare up here for parsing later
        string errMsg;

        // Special-case for config and admin
        if ( shardName == "config" || shardName == "admin" ) {
            *shardHost = ConnectionString::parse( configServer.modelServer(), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }

        //
        // First get the information about the shard from the shard cache
        //

        // Internally uses our shard cache, does no reload
        Shard shard = Shard::findIfExists( shardName );
        if ( shard.getName() == "" ) {
            return Status( ErrorCodes::ShardNotFound,
                           string("unknown shard name ") + shardName );
        }

        ConnectionString rawShardHost = ConnectionString::parse( shard.getConnString(), errMsg );
        dassert( errMsg == "" );
        dassert( rawShardHost.type() == ConnectionString::SET
                 || rawShardHost.type() == ConnectionString::MASTER );

        if ( rawShardHost.type() == ConnectionString::MASTER ) {
            *shardHost = rawShardHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Does not reload the monitor if it doesn't currently exist
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawShardHost.getSetName(),
                                                                   false );
        if ( !replMonitor ) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawShardHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMaster();
            *shardHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
    Status checkClusterMongoVersions(const ConnectionString& configLoc,
                                     const string& minMongoVersion)
    {
        scoped_ptr<ScopedDbConnection> connPtr;

        //
        // Find mongos pings in config server
        //

        try {
            connPtr.reset(new ScopedDbConnection(configLoc, 30));
            ScopedDbConnection& conn = *connPtr;
            scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(MongosType::ConfigNS,
                                                                      Query())));

            while (cursor->more()) {

                BSONObj pingDoc = cursor->next();

                MongosType ping;
                string errMsg;
                // NOTE: We don't care if the ping is invalid, legacy stuff will be
                if (!ping.parseBSON(pingDoc, &errMsg)) {
                    warning() << "could not parse ping document: " << pingDoc << causedBy(errMsg)
                              << endl;
                    continue;
                }

                string mongoVersion = "2.0";
                // Hack to determine older mongos versions from ping format
                if (ping.isWaitingSet()) mongoVersion = "2.2";
                if (ping.isMongoVersionSet() && ping.getMongoVersion() != "") {
                    mongoVersion = ping.getMongoVersion();
                }

                Date_t lastPing = ping.getPing();

                long long quietIntervalMillis = 0;
                Date_t currentJsTime = jsTime();
                if (currentJsTime >= lastPing) {
                    quietIntervalMillis = static_cast<long long>(currentJsTime - lastPing);
                }
                long long quietIntervalMins = quietIntervalMillis / (60 * 1000);

                // We assume that anything that hasn't pinged in 5 minutes is probably down
                if (quietIntervalMins >= 5) {
                    log() << "stale mongos detected " << quietIntervalMins << " minutes ago,"
                          << " network location is " << pingDoc["_id"].String()
                          << ", not checking version" << endl;
            	}
                else {
                    if (versionCmp(mongoVersion, minMongoVersion) < 0) {
                        return Status(ErrorCodes::RemoteValidationError,
                                      stream() << "version " << mongoVersion << " of mongos at "
                                               << ping.getName()
                                               << " is not compatible with the config update, "
                                               << "you must wait 5 minutes "
                                               << "after shutting down a pre-" << minMongoVersion
                                               << " mongos");
                    }
                }
            }
        }
        catch (const DBException& e) {
            return e.toStatus("could not read mongos pings collection");
        }

        //
        // Load shards from config server
        //

        vector<ConnectionString> shardLocs;

        try {
            ScopedDbConnection& conn = *connPtr;
            scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(ShardType::ConfigNS,
                                                                      Query())));

            while (cursor->more()) {

                BSONObj shardDoc = cursor->next();

                ShardType shard;
                string errMsg;
                if (!shard.parseBSON(shardDoc, &errMsg) || !shard.isValid(&errMsg)) {
                    connPtr->done();
                    return Status(ErrorCodes::UnsupportedFormat,
                                  stream() << "invalid shard " << shardDoc
                                           << " read from the config server" << causedBy(errMsg));
                }

                ConnectionString shardLoc = ConnectionString::parse(shard.getHost(), errMsg);
                if (shardLoc.type() == ConnectionString::INVALID) {
                    connPtr->done();
                    return Status(ErrorCodes::UnsupportedFormat,
                                  stream() << "invalid shard host " << shard.getHost()
                                           << " read from the config server" << causedBy(errMsg));
                }

                shardLocs.push_back(shardLoc);
            }
        }
        catch (const DBException& e) {
            return e.toStatus("could not read shards collection");
        }

        connPtr->done();

        //
        // We've now got all the shard info from the config server, start contacting the shards
        // and verifying their versions.
        //

        for (vector<ConnectionString>::iterator it = shardLocs.begin(); it != shardLocs.end(); ++it)
        {
            ConnectionString& shardLoc = *it;

            vector<HostAndPort> servers = shardLoc.getServers();

            for (vector<HostAndPort>::iterator serverIt = servers.begin();
                    serverIt != servers.end(); ++serverIt)
            {
                // Note: This will *always* be a single-host connection
                ConnectionString serverLoc(*serverIt);

                log() << "checking that version of host " << serverLoc << " is compatible with " 
                      << minMongoVersion << endl;

                scoped_ptr<ScopedDbConnection> serverConnPtr;

                bool resultOk;
                BSONObj buildInfo;

                try {
                    serverConnPtr.reset(new ScopedDbConnection(serverLoc, 30));
                    ScopedDbConnection& serverConn = *serverConnPtr;

                    resultOk = serverConn->runCommand("admin",
                                                      BSON("buildInfo" << 1),
                                                      buildInfo);
                }
                catch (const DBException& e) {
                    warning() << "could not run buildInfo command on " << serverLoc.toString()
                              << causedBy(e) << ", you must manually verify this mongo server is "
                              << "offline (for at least 5 minutes) or of a version >= 2.2" << endl;
                    continue;
                }

                // TODO: Make running commands saner such that we can consolidate error handling
                if (!resultOk) {
                    return Status(ErrorCodes::UnknownError,
                                  stream() << DBClientConnection::getLastErrorString(buildInfo)
                                           << causedBy(buildInfo.toString()));
                }

                serverConnPtr->done();

                verify(buildInfo["version"].type() == String);
                string mongoVersion = buildInfo["version"].String();

                if (versionCmp(mongoVersion, minMongoVersion) < 0) {
                    return Status(ErrorCodes::RemoteValidationError,
                                  stream() << "version " << mongoVersion << " of mongo server at "
                                           << serverLoc.toString()
                                           << " is not compatible with the config update");
                }
            }
        }

        return Status::OK();
    }
StatusWith<std::string> ShardingCatalogManagerImpl::addShard(
    OperationContext* opCtx,
    const std::string* shardProposedName,
    const ConnectionString& shardConnectionString,
    const long long maxSize) {
    if (shardConnectionString.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Invalid connection string"};
    }

    if (shardProposedName && shardProposedName->empty()) {
        return {ErrorCodes::BadValue, "shard name cannot be empty"};
    }

    // Only one addShard operation can be in progress at a time.
    Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock);

    // Check if this shard has already been added (can happen in the case of a retry after a network
    // error, for example) and thus this addShard request should be considered a no-op.
    auto existingShard =
        _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize);
    if (!existingShard.isOK()) {
        return existingShard.getStatus();
    }
    if (existingShard.getValue()) {
        // These hosts already belong to an existing shard, so report success and terminate the
        // addShard request.  Make sure to set the last optime for the client to the system last
        // optime so that we'll still wait for replication so that this state is visible in the
        // committed snapshot.
        repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
        return existingShard.getValue()->getName();
    }

    // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a
    // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the
    // set with that setName from the ReplicaSetMonitorManager and will create a new
    // ReplicaSetMonitor when targeting the set below.
    // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike
    // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a
    // shard is removed.
    if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) {
        // If the first reload joined an existing one, call reload again to ensure the reload is
        // fresh.
        Grid::get(opCtx)->shardRegistry()->reload(opCtx);
    }

    // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
    const std::shared_ptr<Shard> shard{
        Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)};
    invariant(shard);
    auto targeter = shard->getTargeter();

    auto stopMonitoringGuard = MakeGuard([&] {
        if (shardConnectionString.type() == ConnectionString::SET) {
            // This is a workaround for the case were we could have some bad shard being
            // requested to be added and we put that bad connection string on the global replica set
            // monitor registry. It needs to be cleaned up so that when a correct replica set is
            // added, it will be recreated.
            ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        }
    });

    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString);
    if (!shardStatus.isOK()) {
        return shardStatus.getStatus();
    }
    ShardType& shardType = shardStatus.getValue();

    // Check that none of the existing shard candidate's dbs exist already
    auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    for (const auto& dbName : dbNamesStatus.getValue()) {
        auto dbt = Grid::get(opCtx)->catalogClient(opCtx)->getDatabase(opCtx, dbName);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                                        << "'"
                                        << shardConnectionString.toString()
                                        << "'"
                                        << " because a local database '"
                                        << dbName
                                        << "' exists in another "
                                        << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        auto result = generateNewShardName(opCtx);
        if (!result.isOK()) {
            return result.getStatus();
        }
        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    // If the minimum allowed version for the cluster is 3.4, set the featureCompatibilityVersion to
    // 3.4 on the shard.
    if (serverGlobalParams.featureCompatibility.version.load() ==
        ServerGlobalParams::FeatureCompatibility::Version::k34) {
        auto versionResponse =
            _runCommandForAddShard(opCtx,
                                   targeter.get(),
                                   "admin",
                                   BSON(FeatureCompatibilityVersion::kCommandName
                                        << FeatureCompatibilityVersionCommandParser::kVersion34));
        if (!versionResponse.isOK()) {
            return versionResponse.getStatus();
        }

        if (!versionResponse.getValue().commandStatus.isOK()) {
            if (versionResponse.getStatus().code() == ErrorCodes::CommandNotFound) {
                return {ErrorCodes::OperationFailed,
                        "featureCompatibilityVersion for cluster is 3.4, cannot add a shard with "
                        "version below 3.4. See "
                        "http://dochub.mongodb.org/core/3.4-feature-compatibility."};
            }
            return versionResponse.getValue().commandStatus;
        }
    }

    if (!MONGO_FAIL_POINT(dontUpsertShardIdentityOnNewShards)) {
        auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName());

        LOG(2) << "going to insert shardIdentity document into shard: " << shardType;

        auto swCommandResponse =
            _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest);
        if (!swCommandResponse.isOK()) {
            return swCommandResponse.getStatus();
        }

        auto commandResponse = std::move(swCommandResponse.getValue());

        BatchedCommandResponse batchResponse;
        auto batchResponseStatus =
            Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
        if (!batchResponseStatus.isOK()) {
            return batchResponseStatus;
        }
    }

    log() << "going to insert new entry for shard into config.shards: " << shardType.toString();

    Status result = Grid::get(opCtx)->catalogClient(opCtx)->insertConfigDocument(
        opCtx,
        ShardType::ConfigNS,
        shardType.toBSON(),
        ShardingCatalogClient::kMajorityWriteConcern);
    if (!result.isOK()) {
        log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
        return result;
    }

    // Add all databases which were discovered on the new shard
    for (const auto& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt;
        dbt.setName(dbName);
        dbt.setPrimary(shardType.getName());
        dbt.setSharded(false);

        Status status = Grid::get(opCtx)->catalogClient(opCtx)->updateDatabase(opCtx, dbName, dbt);
        if (!status.isOK()) {
            log() << "adding shard " << shardConnectionString.toString()
                  << " even though could not add database " << dbName;
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    Grid::get(opCtx)->catalogClient(opCtx)->logChange(
        opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern);

    // Ensure the added shard is visible to this process.
    auto shardRegistry = Grid::get(opCtx)->shardRegistry();
    if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) {
        return {ErrorCodes::OperationFailed,
                "Could not find shard metadata for shard after adding it. This most likely "
                "indicates that the shard was removed immediately after it was added."};
    }
    stopMonitoringGuard.Dismiss();

    return shardType.getName();
}
Esempio n. 21
0
    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere
        string nameInternal;
        if ( ! name ) {
            name = &nameInternal;
        }

        ReplicaSetMonitorPtr rsMonitor;

        // Check whether the host (or set) exists and run several sanity checks on this request.
        // There are two set of sanity checks: making sure adding this particular shard is consistent
        // with the replica set state (if it exists) and making sure this shards databases can be
        // brought into the grid without conflict.

        vector<string> dbNames;
        try {
            ScopedDbConnection newShardConn(servers.toString());
            newShardConn->getLastError();

            if ( newShardConn->type() == ConnectionString::SYNC ) {
                newShardConn.done();
                errMsg = "can't use sync cluster as a shard.  for replica set, have to use <setname>/<server1>,<server2>,...";
                return false;
            }
            
            BSONObj resIsMongos;
            bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos );

            // should return ok=0, cmd not found if it's a normal mongod
            if ( ok ) {
                errMsg = "can't add a mongos process as a shard";
                newShardConn.done();
                return false;
            }

            BSONObj resIsMaster;
            ok =  newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );
            if ( !ok ) {
                ostringstream ss;
                ss << "failed running isMaster: " << resIsMaster;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // if the shard has only one host, make sure it is not part of a replica set
            string setName = resIsMaster["setName"].str();
            string commandSetName = servers.getSetName();
            if ( commandSetName.empty() && ! setName.empty() ) {
                ostringstream ss;
                ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,....";
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }
            if ( !commandSetName.empty() && setName.empty() ) {
                ostringstream ss;
                ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // if the shard is part of replica set, make sure it is the right one
            if ( ! commandSetName.empty() && ( commandSetName != setName ) ) {
                ostringstream ss;
                ss << "host is part of a different set: " << setName;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            if( setName.empty() ) { 
                // check this isn't a --configsvr
                BSONObj res;
                bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res);
                ostringstream ss;
                if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) {
                    errMsg = "the specified mongod is a --configsvr and should thus not be a shard server";
                    newShardConn.done();
                    return false;
                }                
            }

            // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of
            // the set. It is fine if not all members of the set are present in 'servers'.
            bool foundAll = true;
            string offendingHost;
            if ( ! commandSetName.empty() ) {
                set<string> hostSet;
                BSONObjIterator iter( resIsMaster["hosts"].Obj() );
                while ( iter.more() ) {
                    hostSet.insert( iter.next().String() ); // host:port
                }
                if ( resIsMaster["passives"].isABSONObj() ) {
                    BSONObjIterator piter( resIsMaster["passives"].Obj() );
                    while ( piter.more() ) {
                        hostSet.insert( piter.next().String() ); // host:port
                    }
                }
                if ( resIsMaster["arbiters"].isABSONObj() ) {
                    BSONObjIterator piter( resIsMaster["arbiters"].Obj() );
                    while ( piter.more() ) {
                        hostSet.insert( piter.next().String() ); // host:port
                    }
                }

                vector<HostAndPort> hosts = servers.getServers();
                for ( size_t i = 0 ; i < hosts.size() ; i++ ) {
                    if (!hosts[i].hasPort()) {
                        hosts[i].setPort(ServerGlobalParams::DefaultDBPort);
                    }
                    string host = hosts[i].toString(); // host:port
                    if ( hostSet.find( host ) == hostSet.end() ) {
                        offendingHost = host;
                        foundAll = false;
                        break;
                    }
                }
            }
            if ( ! foundAll ) {
                ostringstream ss;
                ss << "in seed list " << servers.toString() << ", host " << offendingHost
                   << " does not belong to replica set " << setName;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // shard name defaults to the name of the replica set
            if ( name->empty() && ! setName.empty() )
                *name = setName;

            // In order to be accepted as a new shard, that mongod must not have any database name that exists already
            // in any other shards. If that test passes, the new shard's databases are going to be entered as
            // non-sharded db's whose primary is the newly added shard.

            BSONObj resListDB;
            ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB );
            if ( !ok ) {
                ostringstream ss;
                ss << "failed listing " << servers.toString() << "'s databases:" << resListDB;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            BSONObjIterator i( resListDB["databases"].Obj() );
            while ( i.more() ) {
                BSONObj dbEntry = i.next().Obj();
                const string& dbName = dbEntry["name"].String();
                if ( _isSpecialLocalDB( dbName ) ) {
                    // 'local', 'admin', and 'config' are system DBs and should be excluded here
                    continue;
                }
                else {
                    dbNames.push_back( dbName );
                }
            }

            if ( newShardConn->type() == ConnectionString::SET ) 
                rsMonitor = ReplicaSetMonitor::get( setName );

            newShardConn.done();
        }
        catch ( DBException& e ) {
            if ( servers.type() == ConnectionString::SET ) {
                ReplicaSetMonitor::remove( servers.getSetName() );
            }
            ostringstream ss;
            ss << "couldn't connect to new shard ";
            ss << e.what();
            errMsg = ss.str();
            return false;
        }

        // check that none of the existing shard candidate's db's exist elsewhere
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , false );
            if ( config.get() != NULL ) {
                ostringstream ss;
                ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                ss << "' exists in another " << config->getPrimary().toString();
                errMsg = ss.str();
                return false;
            }
        }

        // if a name for a shard wasn't provided, pick one.
        if ( name->empty() && ! _getNewShardName( name ) ) {
            errMsg = "error generating new shard name";
            return false;
        }

        // build the ConfigDB shard document
        BSONObjBuilder b;
        b.append(ShardType::name(), *name);
        b.append(ShardType::host(),
                 rsMonitor ? rsMonitor->getServerAddress() : servers.toString());
        if (maxSize > 0) {
            b.append(ShardType::maxSize(), maxSize);
        }
        BSONObj shardDoc = b.obj();

        {
            ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30);

            // check whether the set of hosts (or single host) is not an already a known shard
            BSONObj old = conn->findOne(ShardType::ConfigNS,
                                        BSON(ShardType::host(servers.toString())));

            if ( ! old.isEmpty() ) {
                errMsg = "host already used";
                conn.done();
                return false;
            }
            conn.done();
        }

        log() << "going to add shard: " << shardDoc << endl;

        Status result = clusterInsert( ShardType::ConfigNS,
                                       shardDoc,
                                       WriteConcernOptions::AllConfigs,
                                       NULL );

        if ( !result.isOK() ) {
            errMsg = result.reason();
            log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
            return false;
        }

        Shard::reloadShardInfo();

        // add all databases of the new shard
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , true , *name );
            if ( ! config ) {
                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
            }
        }

        // Record in changelog
        BSONObjBuilder shardDetails;
        shardDetails.append("name", *name);
        shardDetails.append("host", servers.toString());
        configServer.logChange("addShard", "", shardDetails.obj());

        return true;
    }
StatusWith<std::string> ShardingCatalogManager::addShard(
    OperationContext* opCtx,
    const std::string* shardProposedName,
    const ConnectionString& shardConnectionString,
    const long long maxSize) {
    if (shardConnectionString.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Invalid connection string"};
    }

    if (shardProposedName && shardProposedName->empty()) {
        return {ErrorCodes::BadValue, "shard name cannot be empty"};
    }

    // Only one addShard operation can be in progress at a time.
    Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock);

    // Check if this shard has already been added (can happen in the case of a retry after a network
    // error, for example) and thus this addShard request should be considered a no-op.
    auto existingShard =
        _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize);
    if (!existingShard.isOK()) {
        return existingShard.getStatus();
    }
    if (existingShard.getValue()) {
        // These hosts already belong to an existing shard, so report success and terminate the
        // addShard request.  Make sure to set the last optime for the client to the system last
        // optime so that we'll still wait for replication so that this state is visible in the
        // committed snapshot.
        repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
        return existingShard.getValue()->getName();
    }

    // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a
    // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the
    // set with that setName from the ReplicaSetMonitorManager and will create a new
    // ReplicaSetMonitor when targeting the set below.
    // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike
    // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a
    // shard is removed.
    if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) {
        // If the first reload joined an existing one, call reload again to ensure the reload is
        // fresh.
        Grid::get(opCtx)->shardRegistry()->reload(opCtx);
    }

    // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
    const std::shared_ptr<Shard> shard{
        Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)};
    invariant(shard);
    auto targeter = shard->getTargeter();

    auto stopMonitoringGuard = MakeGuard([&] {
        if (shardConnectionString.type() == ConnectionString::SET) {
            // This is a workaround for the case were we could have some bad shard being
            // requested to be added and we put that bad connection string on the global replica set
            // monitor registry. It needs to be cleaned up so that when a correct replica set is
            // added, it will be recreated.
            ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        }
    });

    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString);
    if (!shardStatus.isOK()) {
        return shardStatus.getStatus();
    }
    ShardType& shardType = shardStatus.getValue();

    // Check that none of the existing shard candidate's dbs exist already
    auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    for (const auto& dbName : dbNamesStatus.getValue()) {
        auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase(
            opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                                        << "'"
                                        << shardConnectionString.toString()
                                        << "'"
                                        << " because a local database '"
                                        << dbName
                                        << "' exists in another "
                                        << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // Check that the shard candidate does not have a local config.system.sessions collection
    auto res = _dropSessionsCollection(opCtx, targeter);

    if (!res.isOK()) {
        return res.withContext(
            "can't add shard with a local copy of config.system.sessions, please drop this "
            "collection from the shard manually and try again.");
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        auto result = generateNewShardName(opCtx);
        if (!result.isOK()) {
            return result.getStatus();
        }
        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    // Helper function that runs a command on the to-be shard and returns the status
    auto runCmdOnNewShard = [this, &opCtx, &targeter](const BSONObj& cmd) -> Status {
        auto swCommandResponse =
            _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, cmd);
        if (!swCommandResponse.isOK()) {
            return swCommandResponse.getStatus();
        }
        // Grabs the underlying status from a StatusWith object by taking the first
        // non-OK status, if there is one. This is needed due to the semantics of
        // _runCommandForAddShard.
        auto commandResponse = std::move(swCommandResponse.getValue());
        BatchedCommandResponse batchResponse;
        return Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
    };

    AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName());

    auto addShardCmdBSON = [&]() {
        // In 4.2, use the _addShard command to add the shard, which in turn inserts a
        // shardIdentity document into the shard and triggers sharding state initialization.
        // In the unlikely scenario that there's a downgrade to 4.0 between the
        // construction of this command object and the issuing of the command
        // on the receiving shard, the user will receive a rather harmless
        // CommandNotFound error for _addShard, and can simply retry.
        if (serverGlobalParams.featureCompatibility.getVersion() ==
            ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) {
            // Needed for IDL toBSON method
            BSONObj passthroughFields;
            return addShardCmd.toBSON(passthroughFields);
        } else {
            // To support backwards compatibility with v4.0 shards, insert a shardIdentity document
            // directly.
            return add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd);
        }
    }();

    auto addShardStatus = runCmdOnNewShard(addShardCmdBSON);

    if (!addShardStatus.isOK()) {
        return addShardStatus;
    }

    {
        // Hold the fcvLock across checking the FCV, sending setFCV to the new shard, and
        // writing the entry for the new shard to config.shards. This ensures the FCV doesn't change
        // after we send setFCV to the new shard, but before we write its entry to config.shards.
        // (Note, we don't use a Global IX lock here, because we don't want to hold the global lock
        // while blocking on the network).
        invariant(!opCtx->lockState()->isLocked());
        Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);

        BSONObj setFCVCmd;
        switch (serverGlobalParams.featureCompatibility.getVersion()) {
            case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42:
            case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42:
                setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
                                 << FeatureCompatibilityVersionParser::kVersion42
                                 << WriteConcernOptions::kWriteConcernField
                                 << opCtx->getWriteConcern().toBSON());
                break;
            default:
                setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
                                 << FeatureCompatibilityVersionParser::kVersion40
                                 << WriteConcernOptions::kWriteConcernField
                                 << opCtx->getWriteConcern().toBSON());
                break;
        }
        auto versionResponse =
            _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd);
        if (!versionResponse.isOK()) {
            return versionResponse.getStatus();
        }

        if (!versionResponse.getValue().commandStatus.isOK()) {
            return versionResponse.getValue().commandStatus;
        }

        log() << "going to insert new entry for shard into config.shards: " << shardType.toString();

        Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument(
            opCtx,
            ShardType::ConfigNS,
            shardType.toBSON(),
            ShardingCatalogClient::kLocalWriteConcern);
        if (!result.isOK()) {
            log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
            return result;
        }
    }

    // Add all databases which were discovered on the new shard
    for (const auto& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt(dbName, shardType.getName(), false, databaseVersion::makeNew());

        {
            const auto status = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
                opCtx,
                DatabaseType::ConfigNS,
                BSON(DatabaseType::name(dbName)),
                dbt.toBSON(),
                true,
                ShardingCatalogClient::kLocalWriteConcern);
            if (!status.isOK()) {
                log() << "adding shard " << shardConnectionString.toString()
                      << " even though could not add database " << dbName;
            }
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    Grid::get(opCtx)->catalogClient()->logChange(
        opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern);

    // Ensure the added shard is visible to this process.
    auto shardRegistry = Grid::get(opCtx)->shardRegistry();
    if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) {
        return {ErrorCodes::OperationFailed,
                "Could not find shard metadata for shard after adding it. This most likely "
                "indicates that the shard was removed immediately after it was added."};
    }
    stopMonitoringGuard.Dismiss();

    return shardType.getName();
}
StatusWith<boost::optional<ShardType>> ShardingCatalogManager::_checkIfShardExists(
    OperationContext* opCtx,
    const ConnectionString& proposedShardConnectionString,
    const std::string* proposedShardName,
    long long proposedShardMaxSize) {
    // Check whether any host in the connection is already part of the cluster.
    const auto existingShards = Grid::get(opCtx)->catalogClient()->getAllShards(
        opCtx, repl::ReadConcernLevel::kLocalReadConcern);
    if (!existingShards.isOK()) {
        return existingShards.getStatus().withContext(
            "Failed to load existing shards during addShard");
    }

    // Now check if this shard already exists - if it already exists *with the same options* then
    // the addShard request can return success early without doing anything more.
    for (const auto& existingShard : existingShards.getValue().value) {
        auto swExistingShardConnStr = ConnectionString::parse(existingShard.getHost());
        if (!swExistingShardConnStr.isOK()) {
            return swExistingShardConnStr.getStatus();
        }
        auto existingShardConnStr = std::move(swExistingShardConnStr.getValue());

        // Function for determining if the options for the shard that is being added match the
        // options of an existing shard that conflicts with it.
        auto shardsAreEquivalent = [&]() {
            if (proposedShardName && *proposedShardName != existingShard.getName()) {
                return false;
            }
            if (proposedShardConnectionString.type() != existingShardConnStr.type()) {
                return false;
            }
            if (proposedShardConnectionString.type() == ConnectionString::SET &&
                proposedShardConnectionString.getSetName() != existingShardConnStr.getSetName()) {
                return false;
            }
            if (proposedShardMaxSize != existingShard.getMaxSizeMB()) {
                return false;
            }
            return true;
        };

        if (existingShardConnStr.type() == ConnectionString::SET &&
            proposedShardConnectionString.type() == ConnectionString::SET &&
            existingShardConnStr.getSetName() == proposedShardConnectionString.getSetName()) {
            // An existing shard has the same replica set name as the shard being added.
            // If the options aren't the same, then this is an error,
            // but if the options match then the addShard operation should be immediately
            // considered a success and terminated.
            if (shardsAreEquivalent()) {
                return {existingShard};
            } else {
                return {ErrorCodes::IllegalOperation,
                        str::stream() << "A shard already exists containing the replica set '"
                                      << existingShardConnStr.getSetName()
                                      << "'"};
            }
        }

        for (const auto& existingHost : existingShardConnStr.getServers()) {
            // Look if any of the hosts in the existing shard are present within the shard trying
            // to be added.
            for (const auto& addingHost : proposedShardConnectionString.getServers()) {
                if (existingHost == addingHost) {
                    // At least one of the hosts in the shard being added already exists in an
                    // existing shard.  If the options aren't the same, then this is an error,
                    // but if the options match then the addShard operation should be immediately
                    // considered a success and terminated.
                    if (shardsAreEquivalent()) {
                        return {existingShard};
                    } else {
                        return {ErrorCodes::IllegalOperation,
                                str::stream() << "'" << addingHost.toString() << "' "
                                              << "is already a member of the existing shard '"
                                              << existingShard.getHost()
                                              << "' ("
                                              << existingShard.getName()
                                              << ")."};
                    }
                }
            }
        }

        if (proposedShardName && *proposedShardName == existingShard.getName()) {
            // If we get here then we're trying to add a shard with the same name as an existing
            // shard, but there was no overlap in the hosts between the existing shard and the
            // proposed connection string for the new shard.
            return {ErrorCodes::IllegalOperation,
                    str::stream() << "A shard named " << *proposedShardName << " already exists"};
        }
    }

    return {boost::none};
}
Esempio n. 24
0
void ShardRegistry::updateReplSetHosts(const ConnectionString& newConnString) {
    invariant(newConnString.type() == ConnectionString::SET ||
              newConnString.type() == ConnectionString::CUSTOM);  // For dbtests

    _data.rebuildShardIfExists(newConnString, _shardFactory.get());
}
StatusWith<string> ShardingCatalogManagerImpl::addShard(
    OperationContext* txn,
    const std::string* shardProposedName,
    const ConnectionString& shardConnectionString,
    const long long maxSize) {
    if (shardConnectionString.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Invalid connection string"};
    }

    if (shardProposedName && shardProposedName->empty()) {
        return {ErrorCodes::BadValue, "shard name cannot be empty"};
    }

    // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
    const std::shared_ptr<Shard> shard{
        Grid::get(txn)->shardRegistry()->createConnection(shardConnectionString)};
    invariant(shard);
    auto targeter = shard->getTargeter();

    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        _validateHostAsShard(txn, targeter, shardProposedName, shardConnectionString);
    if (!shardStatus.isOK()) {
        // TODO: This is a workaround for the case were we could have some bad shard being
        // requested to be added and we put that bad connection string on the global replica set
        // monitor registry. It needs to be cleaned up so that when a correct replica set is added,
        // it will be recreated.
        ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        return shardStatus.getStatus();
    }

    ShardType& shardType = shardStatus.getValue();

    auto dbNamesStatus = _getDBNamesListFromShard(txn, targeter);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    // Check that none of the existing shard candidate's dbs exist already
    for (const string& dbName : dbNamesStatus.getValue()) {
        auto dbt = _catalogClient->getDatabase(txn, dbName);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                          << "'"
                          << shardConnectionString.toString()
                          << "'"
                          << " because a local database '"
                          << dbName
                          << "' exists in another "
                          << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        StatusWith<string> result = _generateNewShardName(txn);
        if (!result.isOK()) {
            return result.getStatus();
        }
        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    ShardIdentityType shardIdentity;
    shardIdentity.setConfigsvrConnString(
        Grid::get(txn)->shardRegistry()->getConfigServerConnectionString());
    shardIdentity.setShardName(shardType.getName());
    shardIdentity.setClusterId(Grid::get(txn)->shardRegistry()->getClusterId());
    auto validateStatus = shardIdentity.validate();
    if (!validateStatus.isOK()) {
        return validateStatus;
    }

    log() << "going to insert shardIdentity document into shard: " << shardIdentity.toString();

    auto updateRequest = shardIdentity.createUpsertForAddShard();
    BatchedCommandRequest commandRequest(updateRequest.release());
    commandRequest.setNS(NamespaceString::kConfigCollectionNamespace);
    commandRequest.setWriteConcern(kMajorityWriteConcern.toBSON());

    auto swCommandResponse =
        _runCommandForAddShard(txn, targeter.get(), "admin", commandRequest.toBSON());

    if (!swCommandResponse.isOK()) {
        return swCommandResponse.getStatus();
    }

    auto commandResponse = std::move(swCommandResponse.getValue());

    BatchedCommandResponse batchResponse;
    auto batchResponseStatus =
        Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
    if (!batchResponseStatus.isOK()) {
        return batchResponseStatus;
    }

    log() << "going to insert new entry for shard into config.shards: " << shardType.toString();

    Status result =
        _catalogClient->insertConfigDocument(txn, ShardType::ConfigNS, shardType.toBSON());
    if (!result.isOK()) {
        log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
        if (result == ErrorCodes::DuplicateKey) {
            // TODO(SERVER-24213): adding a shard that already exists should be considered success,
            // however this approach does no validation that we are adding the shard with the same
            // options.  It also does not protect against adding the same shard with a different
            // shard name and slightly different connection string.  This is a temporary hack to
            // get the continuous stepdown suite passing.
            warning() << "Received duplicate key error when inserting new shard with name "
                      << shardType.getName() << " and connection string "
                      << shardConnectionString.toString()
                      << " to config.shards collection.  This most likely means that there was an "
                      "attempt to add a shard that already exists in the cluster";
            return shardType.getName();
        }
        return result;
    }

    // Add all databases which were discovered on the new shard
    for (const string& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt;
        dbt.setName(dbName);
        dbt.setPrimary(shardType.getName());
        dbt.setSharded(false);

        Status status = _catalogClient->updateDatabase(txn, dbName, dbt);
        if (!status.isOK()) {
            log() << "adding shard " << shardConnectionString.toString()
                  << " even though could not add database " << dbName;
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    _catalogClient->logChange(txn, "addShard", "", shardDetails.obj());

    // Ensure the added shard is visible to this process.
    auto shardRegistry = Grid::get(txn)->shardRegistry();
    if (!shardRegistry->getShard(txn, shardType.getName())) {
        return {ErrorCodes::OperationFailed,
                "Could not find shard metadata for shard after adding it. This most likely "
                "indicates that the shard was removed immediately after it was added."};
    }

    return shardType.getName();
}
Esempio n. 26
0
    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere
        string nameInternal;
        if ( ! name ) {
            name = &nameInternal;
        }

        ReplicaSetMonitorPtr rsMonitor;

        // Check whether the host (or set) exists and run several sanity checks on this request.
        // There are two set of sanity checks: making sure adding this particular shard is consistent
        // with the replica set state (if it exists) and making sure this shards databases can be
        // brought into the grid without conflict.

        if ( servers.type() == ConnectionString::SYNC ) {
            errMsg = "can't use sync cluster as a shard for replica set, "
                    "have to use <setname>/<server1>,<server2>,...";
            return false;
        }

        vector<string> dbNames;
        try {
            bool ok = false;

            {
                ScopedDbConnection newShardConn(servers.toString());

                BSONObj resIsMongos;
                ok = newShardConn->runCommand( "admin", BSON( "isdbgrid" << 1 ), resIsMongos );
                newShardConn.done();
            }

            // should return ok=0, cmd not found if it's a normal mongod
            if ( ok ) {
                errMsg = "can't add a mongos process as a shard";
                return false;
            }

            if ( servers.type() == ConnectionString::SET ) {
                if (!addReplSetShardCheck( servers, &errMsg )) {
                    return false;
                }

                // shard name defaults to the name of the replica set
                if ( name->empty() && !servers.getSetName().empty() ) {
                    *name = servers.getSetName();
                }
            }

            // In order to be accepted as a new shard, that mongod must not have any database name
            // that exists already in any other shards. If that test passes, the new shard's
            // databases are going to be entered as non-sharded db's whose primary is the
            // newly added shard.

            BSONObj resListDB;
            {
                ScopedDbConnection newShardConn(servers.toString());
                ok = newShardConn->runCommand( "admin", BSON( "listDatabases" << 1 ), resListDB );
                newShardConn.done();
            }

            if ( !ok ) {
                errMsg = str::stream() << "failed listing " << servers.toString()
                            << "'s databases:" << resListDB;;
                return false;
            }

            BSONObjIterator i( resListDB["databases"].Obj() );
            while ( i.more() ) {
                BSONObj dbEntry = i.next().Obj();
                const string& dbName = dbEntry["name"].String();
                if ( _isSpecialLocalDB( dbName ) ) {
                    // 'local', 'admin', and 'config' are system DBs and should be excluded here
                    continue;
                }
                else {
                    dbNames.push_back( dbName );
                }
            }

            if ( servers.type() == ConnectionString::SET ) {
                rsMonitor = ReplicaSetMonitor::get( servers.getSetName() );
            }
        }
        catch ( DBException& e ) {
            if ( servers.type() == ConnectionString::SET ) {
                ReplicaSetMonitor::remove( servers.getSetName() );
            }

            errMsg = str::stream() << "couldn't connect to new shard " << causedBy(e);
            return false;
        }

        // check that none of the existing shard candidate's db's exist elsewhere
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , false );
            if ( config.get() != NULL ) {
                ostringstream ss;
                ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                ss << "' exists in another " << config->getPrimary().toString();
                errMsg = ss.str();
                return false;
            }
        }

        // if a name for a shard wasn't provided, pick one.
        if ( name->empty() && ! _getNewShardName( name ) ) {
            errMsg = "error generating new shard name";
            return false;
        }

        // build the ConfigDB shard document
        BSONObjBuilder b;
        b.append(ShardType::name(), *name);
        b.append(ShardType::host(),
                 rsMonitor ? rsMonitor->getServerAddress() : servers.toString());
        if (maxSize > 0) {
            b.append(ShardType::maxSize(), maxSize);
        }
        BSONObj shardDoc = b.obj();

        {
            ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30);

            // check whether the set of hosts (or single host) is not an already a known shard
            BSONObj old = conn->findOne(ShardType::ConfigNS,
                                        BSON(ShardType::host(servers.toString())));

            if ( ! old.isEmpty() ) {
                errMsg = "host already used";
                conn.done();
                return false;
            }

            log() << "going to add shard: " << shardDoc << endl;

            conn->insert(ShardType::ConfigNS , shardDoc);
            errMsg = conn->getLastError();
            if ( ! errMsg.empty() ) {
                log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
                conn.done();
                return false;
            }

            conn.done();
        }

        Shard::reloadShardInfo();

        // add all databases of the new shard
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , true , *name );
            if ( ! config ) {
                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
            }
        }

        // Record in changelog
        BSONObjBuilder shardDetails;
        shardDetails.append("name", *name);
        shardDetails.append("host", servers.toString());
        configServer.logChange("addShard", "", shardDetails.obj());

        return true;
    }