StatusWith<ShardType> ShardingCatalogManager::_validateHostAsShard(
    OperationContext* opCtx,
    std::shared_ptr<RemoteCommandTargeter> targeter,
    const std::string* shardProposedName,
    const ConnectionString& connectionString) {
    auto swCommandResponse = _runCommandForAddShard(
        opCtx, targeter.get(), NamespaceString::kAdminDb, BSON("isMaster" << 1));
    if (swCommandResponse.getStatus() == ErrorCodes::IncompatibleServerVersion) {
        return swCommandResponse.getStatus().withReason(
            str::stream() << "Cannot add " << connectionString.toString()
                          << " as a shard because its binary version is not compatible with "
                             "the cluster's featureCompatibilityVersion.");
    } else if (!swCommandResponse.isOK()) {
        return swCommandResponse.getStatus();
    }

    // Check for a command response error
    auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus);
    if (!resIsMasterStatus.isOK()) {
        return resIsMasterStatus.withContext(str::stream()
                                             << "Error running isMaster against "
                                             << targeter->connectionString().toString());
    }

    auto resIsMaster = std::move(swCommandResponse.getValue().response);

    // Fail if the node being added is a mongos.
    const std::string msg = resIsMaster.getStringField("msg");
    if (msg == "isdbgrid") {
        return {ErrorCodes::IllegalOperation, "cannot add a mongos as a shard"};
    }

    // Extract the maxWireVersion so we can verify that the node being added has a binary version
    // greater than or equal to the cluster's featureCompatibilityVersion. We expect an incompatible
    // binary node to be unable to communicate, returning an IncompatibleServerVersion error,
    // because of our internal wire version protocol. So we can safely invariant here that the node
    // is compatible.
    long long maxWireVersion;
    Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion);
    if (!status.isOK()) {
        return status.withContext(str::stream() << "isMaster returned invalid 'maxWireVersion' "
                                                << "field when attempting to add "
                                                << connectionString.toString()
                                                << " as a shard");
    }
    if (serverGlobalParams.featureCompatibility.getVersion() >
        ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) {
        // If the cluster's FCV is 4.2, or upgrading to / downgrading from, the node being added
        // must be a v4.2 binary.
        invariant(maxWireVersion == WireVersion::LATEST_WIRE_VERSION);
    } else {
        // If the cluster's FCV is 4.0, the node being added must be a v4.0 or v4.2 binary.
        invariant(serverGlobalParams.featureCompatibility.getVersion() ==
                  ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40);
        invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 1);
    }

    // Check whether there is a master. If there isn't, the replica set may not have been
    // initiated. If the connection is a standalone, it will return true for isMaster.
    bool isMaster;
    status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster);
    if (!status.isOK()) {
        return status.withContext(str::stream() << "isMaster returned invalid 'ismaster' "
                                                << "field when attempting to add "
                                                << connectionString.toString()
                                                << " as a shard");
    }
    if (!isMaster) {
        return {ErrorCodes::NotMaster,
                str::stream()
                    << connectionString.toString()
                    << " does not have a master. If this is a replica set, ensure that it has a"
                    << " healthy primary and that the set has been properly initiated."};
    }

    const std::string providedSetName = connectionString.getSetName();
    const std::string foundSetName = resIsMaster["setName"].str();

    // Make sure the specified replica set name (if any) matches the actual shard's replica set
    if (providedSetName.empty() && !foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host is part of set " << foundSetName << "; "
                              << "use replica set url format "
                              << "<setname>/<server1>,<server2>, ..."};
    }

    if (!providedSetName.empty() && foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host did not return a set name; "
                              << "is the replica set still initializing? "
                              << resIsMaster};
    }

    // Make sure the set name specified in the connection string matches the one where its hosts
    // belong into
    if (!providedSetName.empty() && (providedSetName != foundSetName)) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "the provided connection string (" << connectionString.toString()
                              << ") does not match the actual set name "
                              << foundSetName};
    }

    // Is it a config server?
    if (resIsMaster.hasField("configsvr")) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "Cannot add " << connectionString.toString()
                              << " as a shard since it is a config server"};
    }

    // If the shard is part of a replica set, make sure all the hosts mentioned in the connection
    // string are part of the set. It is fine if not all members of the set are mentioned in the
    // connection string, though.
    if (!providedSetName.empty()) {
        std::set<std::string> hostSet;

        BSONObjIterator iter(resIsMaster["hosts"].Obj());
        while (iter.more()) {
            hostSet.insert(iter.next().String());  // host:port
        }

        if (resIsMaster["passives"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["passives"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        if (resIsMaster["arbiters"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["arbiters"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        for (const auto& hostEntry : connectionString.getServers()) {
            const auto& host = hostEntry.toString();  // host:port
            if (hostSet.find(host) == hostSet.end()) {
                return {ErrorCodes::OperationFailed,
                        str::stream() << "in seed list " << connectionString.toString() << ", host "
                                      << host
                                      << " does not belong to replica set "
                                      << foundSetName
                                      << "; found "
                                      << resIsMaster.toString()};
            }
        }
    }

    std::string actualShardName;

    if (shardProposedName) {
        actualShardName = *shardProposedName;
    } else if (!foundSetName.empty()) {
        // Default it to the name of the replica set
        actualShardName = foundSetName;
    }

    // Disallow adding shard replica set with name 'config'
    if (actualShardName == NamespaceString::kConfigDb) {
        return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"};
    }

    // Retrieve the most up to date connection string that we know from the replica set monitor (if
    // this is a replica set shard, otherwise it will be the same value as connectionString).
    ConnectionString actualShardConnStr = targeter->connectionString();

    ShardType shard;
    shard.setName(actualShardName);
    shard.setHost(actualShardConnStr.toString());
    shard.setState(ShardType::ShardState::kShardAware);

    return shard;
}
StatusWith<std::string> ShardingCatalogManager::addShard(
    OperationContext* opCtx,
    const std::string* shardProposedName,
    const ConnectionString& shardConnectionString,
    const long long maxSize) {
    if (shardConnectionString.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Invalid connection string"};
    }

    if (shardProposedName && shardProposedName->empty()) {
        return {ErrorCodes::BadValue, "shard name cannot be empty"};
    }

    // Only one addShard operation can be in progress at a time.
    Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock);

    // Check if this shard has already been added (can happen in the case of a retry after a network
    // error, for example) and thus this addShard request should be considered a no-op.
    auto existingShard =
        _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize);
    if (!existingShard.isOK()) {
        return existingShard.getStatus();
    }
    if (existingShard.getValue()) {
        // These hosts already belong to an existing shard, so report success and terminate the
        // addShard request.  Make sure to set the last optime for the client to the system last
        // optime so that we'll still wait for replication so that this state is visible in the
        // committed snapshot.
        repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
        return existingShard.getValue()->getName();
    }

    // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a
    // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the
    // set with that setName from the ReplicaSetMonitorManager and will create a new
    // ReplicaSetMonitor when targeting the set below.
    // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike
    // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a
    // shard is removed.
    if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) {
        // If the first reload joined an existing one, call reload again to ensure the reload is
        // fresh.
        Grid::get(opCtx)->shardRegistry()->reload(opCtx);
    }

    // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
    const std::shared_ptr<Shard> shard{
        Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)};
    invariant(shard);
    auto targeter = shard->getTargeter();

    auto stopMonitoringGuard = MakeGuard([&] {
        if (shardConnectionString.type() == ConnectionString::SET) {
            // This is a workaround for the case were we could have some bad shard being
            // requested to be added and we put that bad connection string on the global replica set
            // monitor registry. It needs to be cleaned up so that when a correct replica set is
            // added, it will be recreated.
            ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        }
    });

    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString);
    if (!shardStatus.isOK()) {
        return shardStatus.getStatus();
    }
    ShardType& shardType = shardStatus.getValue();

    // Check that none of the existing shard candidate's dbs exist already
    auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    for (const auto& dbName : dbNamesStatus.getValue()) {
        auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase(
            opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                                        << "'"
                                        << shardConnectionString.toString()
                                        << "'"
                                        << " because a local database '"
                                        << dbName
                                        << "' exists in another "
                                        << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // Check that the shard candidate does not have a local config.system.sessions collection
    auto res = _dropSessionsCollection(opCtx, targeter);

    if (!res.isOK()) {
        return res.withContext(
            "can't add shard with a local copy of config.system.sessions, please drop this "
            "collection from the shard manually and try again.");
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        auto result = generateNewShardName(opCtx);
        if (!result.isOK()) {
            return result.getStatus();
        }
        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    // Helper function that runs a command on the to-be shard and returns the status
    auto runCmdOnNewShard = [this, &opCtx, &targeter](const BSONObj& cmd) -> Status {
        auto swCommandResponse =
            _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, cmd);
        if (!swCommandResponse.isOK()) {
            return swCommandResponse.getStatus();
        }
        // Grabs the underlying status from a StatusWith object by taking the first
        // non-OK status, if there is one. This is needed due to the semantics of
        // _runCommandForAddShard.
        auto commandResponse = std::move(swCommandResponse.getValue());
        BatchedCommandResponse batchResponse;
        return Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
    };

    AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName());

    auto addShardCmdBSON = [&]() {
        // In 4.2, use the _addShard command to add the shard, which in turn inserts a
        // shardIdentity document into the shard and triggers sharding state initialization.
        // In the unlikely scenario that there's a downgrade to 4.0 between the
        // construction of this command object and the issuing of the command
        // on the receiving shard, the user will receive a rather harmless
        // CommandNotFound error for _addShard, and can simply retry.
        if (serverGlobalParams.featureCompatibility.getVersion() ==
            ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) {
            // Needed for IDL toBSON method
            BSONObj passthroughFields;
            return addShardCmd.toBSON(passthroughFields);
        } else {
            // To support backwards compatibility with v4.0 shards, insert a shardIdentity document
            // directly.
            return add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd);
        }
    }();

    auto addShardStatus = runCmdOnNewShard(addShardCmdBSON);

    if (!addShardStatus.isOK()) {
        return addShardStatus;
    }

    {
        // Hold the fcvLock across checking the FCV, sending setFCV to the new shard, and
        // writing the entry for the new shard to config.shards. This ensures the FCV doesn't change
        // after we send setFCV to the new shard, but before we write its entry to config.shards.
        // (Note, we don't use a Global IX lock here, because we don't want to hold the global lock
        // while blocking on the network).
        invariant(!opCtx->lockState()->isLocked());
        Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);

        BSONObj setFCVCmd;
        switch (serverGlobalParams.featureCompatibility.getVersion()) {
            case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42:
            case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42:
                setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
                                 << FeatureCompatibilityVersionParser::kVersion42
                                 << WriteConcernOptions::kWriteConcernField
                                 << opCtx->getWriteConcern().toBSON());
                break;
            default:
                setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
                                 << FeatureCompatibilityVersionParser::kVersion40
                                 << WriteConcernOptions::kWriteConcernField
                                 << opCtx->getWriteConcern().toBSON());
                break;
        }
        auto versionResponse =
            _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd);
        if (!versionResponse.isOK()) {
            return versionResponse.getStatus();
        }

        if (!versionResponse.getValue().commandStatus.isOK()) {
            return versionResponse.getValue().commandStatus;
        }

        log() << "going to insert new entry for shard into config.shards: " << shardType.toString();

        Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument(
            opCtx,
            ShardType::ConfigNS,
            shardType.toBSON(),
            ShardingCatalogClient::kLocalWriteConcern);
        if (!result.isOK()) {
            log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
            return result;
        }
    }

    // Add all databases which were discovered on the new shard
    for (const auto& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt(dbName, shardType.getName(), false, databaseVersion::makeNew());

        {
            const auto status = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
                opCtx,
                DatabaseType::ConfigNS,
                BSON(DatabaseType::name(dbName)),
                dbt.toBSON(),
                true,
                ShardingCatalogClient::kLocalWriteConcern);
            if (!status.isOK()) {
                log() << "adding shard " << shardConnectionString.toString()
                      << " even though could not add database " << dbName;
            }
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    Grid::get(opCtx)->catalogClient()->logChange(
        opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern);

    // Ensure the added shard is visible to this process.
    auto shardRegistry = Grid::get(opCtx)->shardRegistry();
    if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) {
        return {ErrorCodes::OperationFailed,
                "Could not find shard metadata for shard after adding it. This most likely "
                "indicates that the shard was removed immediately after it was added."};
    }
    stopMonitoringGuard.Dismiss();

    return shardType.getName();
}
Esempio n. 3
0
    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere
        string nameInternal;
        if ( ! name ) {
            name = &nameInternal;
        }

        ReplicaSetMonitorPtr rsMonitor;

        // Check whether the host (or set) exists and run several sanity checks on this request.
        // There are two set of sanity checks: making sure adding this particular shard is consistent
        // with the replica set state (if it exists) and making sure this shards databases can be
        // brought into the grid without conflict.

        vector<string> dbNames;
        try {
            ScopedDbConnection newShardConn(servers.toString());
            newShardConn->getLastError();

            if ( newShardConn->type() == ConnectionString::SYNC ) {
                newShardConn.done();
                errMsg = "can't use sync cluster as a shard.  for replica set, have to use <setname>/<server1>,<server2>,...";
                return false;
            }
            
            BSONObj resIsMongos;
            bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos );

            // should return ok=0, cmd not found if it's a normal mongod
            if ( ok ) {
                errMsg = "can't add a mongos process as a shard";
                newShardConn.done();
                return false;
            }

            BSONObj resIsMaster;
            ok =  newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );
            if ( !ok ) {
                ostringstream ss;
                ss << "failed running isMaster: " << resIsMaster;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // if the shard has only one host, make sure it is not part of a replica set
            string setName = resIsMaster["setName"].str();
            string commandSetName = servers.getSetName();
            if ( commandSetName.empty() && ! setName.empty() ) {
                ostringstream ss;
                ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,....";
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }
            if ( !commandSetName.empty() && setName.empty() ) {
                ostringstream ss;
                ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // if the shard is part of replica set, make sure it is the right one
            if ( ! commandSetName.empty() && ( commandSetName != setName ) ) {
                ostringstream ss;
                ss << "host is part of a different set: " << setName;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            if( setName.empty() ) { 
                // check this isn't a --configsvr
                BSONObj res;
                bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res);
                ostringstream ss;
                if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) {
                    errMsg = "the specified mongod is a --configsvr and should thus not be a shard server";
                    newShardConn.done();
                    return false;
                }                
            }

            // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of
            // the set. It is fine if not all members of the set are present in 'servers'.
            bool foundAll = true;
            string offendingHost;
            if ( ! commandSetName.empty() ) {
                set<string> hostSet;
                BSONObjIterator iter( resIsMaster["hosts"].Obj() );
                while ( iter.more() ) {
                    hostSet.insert( iter.next().String() ); // host:port
                }
                if ( resIsMaster["passives"].isABSONObj() ) {
                    BSONObjIterator piter( resIsMaster["passives"].Obj() );
                    while ( piter.more() ) {
                        hostSet.insert( piter.next().String() ); // host:port
                    }
                }
                if ( resIsMaster["arbiters"].isABSONObj() ) {
                    BSONObjIterator piter( resIsMaster["arbiters"].Obj() );
                    while ( piter.more() ) {
                        hostSet.insert( piter.next().String() ); // host:port
                    }
                }

                vector<HostAndPort> hosts = servers.getServers();
                for ( size_t i = 0 ; i < hosts.size() ; i++ ) {
                    if (!hosts[i].hasPort()) {
                        hosts[i].setPort(ServerGlobalParams::DefaultDBPort);
                    }
                    string host = hosts[i].toString(); // host:port
                    if ( hostSet.find( host ) == hostSet.end() ) {
                        offendingHost = host;
                        foundAll = false;
                        break;
                    }
                }
            }
            if ( ! foundAll ) {
                ostringstream ss;
                ss << "in seed list " << servers.toString() << ", host " << offendingHost
                   << " does not belong to replica set " << setName;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // shard name defaults to the name of the replica set
            if ( name->empty() && ! setName.empty() )
                *name = setName;

            // In order to be accepted as a new shard, that mongod must not have any database name that exists already
            // in any other shards. If that test passes, the new shard's databases are going to be entered as
            // non-sharded db's whose primary is the newly added shard.

            BSONObj resListDB;
            ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB );
            if ( !ok ) {
                ostringstream ss;
                ss << "failed listing " << servers.toString() << "'s databases:" << resListDB;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            BSONObjIterator i( resListDB["databases"].Obj() );
            while ( i.more() ) {
                BSONObj dbEntry = i.next().Obj();
                const string& dbName = dbEntry["name"].String();
                if ( _isSpecialLocalDB( dbName ) ) {
                    // 'local', 'admin', and 'config' are system DBs and should be excluded here
                    continue;
                }
                else {
                    dbNames.push_back( dbName );
                }
            }

            if ( newShardConn->type() == ConnectionString::SET ) 
                rsMonitor = ReplicaSetMonitor::get( setName );

            newShardConn.done();
        }
        catch ( DBException& e ) {
            if ( servers.type() == ConnectionString::SET ) {
                ReplicaSetMonitor::remove( servers.getSetName() );
            }
            ostringstream ss;
            ss << "couldn't connect to new shard ";
            ss << e.what();
            errMsg = ss.str();
            return false;
        }

        // check that none of the existing shard candidate's db's exist elsewhere
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , false );
            if ( config.get() != NULL ) {
                ostringstream ss;
                ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                ss << "' exists in another " << config->getPrimary().toString();
                errMsg = ss.str();
                return false;
            }
        }

        // if a name for a shard wasn't provided, pick one.
        if ( name->empty() && ! _getNewShardName( name ) ) {
            errMsg = "error generating new shard name";
            return false;
        }

        // build the ConfigDB shard document
        BSONObjBuilder b;
        b.append(ShardType::name(), *name);
        b.append(ShardType::host(),
                 rsMonitor ? rsMonitor->getServerAddress() : servers.toString());
        if (maxSize > 0) {
            b.append(ShardType::maxSize(), maxSize);
        }
        BSONObj shardDoc = b.obj();

        {
            ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30);

            // check whether the set of hosts (or single host) is not an already a known shard
            BSONObj old = conn->findOne(ShardType::ConfigNS,
                                        BSON(ShardType::host(servers.toString())));

            if ( ! old.isEmpty() ) {
                errMsg = "host already used";
                conn.done();
                return false;
            }
            conn.done();
        }

        log() << "going to add shard: " << shardDoc << endl;

        Status result = clusterInsert( ShardType::ConfigNS,
                                       shardDoc,
                                       WriteConcernOptions::AllConfigs,
                                       NULL );

        if ( !result.isOK() ) {
            errMsg = result.reason();
            log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
            return false;
        }

        Shard::reloadShardInfo();

        // add all databases of the new shard
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , true , *name );
            if ( ! config ) {
                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
            }
        }

        // Record in changelog
        BSONObjBuilder shardDetails;
        shardDetails.append("name", *name);
        shardDetails.append("host", servers.toString());
        configServer.logChange("addShard", "", shardDetails.obj());

        return true;
    }
StatusWith<boost::optional<ShardType>> ShardingCatalogManager::_checkIfShardExists(
    OperationContext* opCtx,
    const ConnectionString& proposedShardConnectionString,
    const std::string* proposedShardName,
    long long proposedShardMaxSize) {
    // Check whether any host in the connection is already part of the cluster.
    const auto existingShards = Grid::get(opCtx)->catalogClient()->getAllShards(
        opCtx, repl::ReadConcernLevel::kLocalReadConcern);
    if (!existingShards.isOK()) {
        return existingShards.getStatus().withContext(
            "Failed to load existing shards during addShard");
    }

    // Now check if this shard already exists - if it already exists *with the same options* then
    // the addShard request can return success early without doing anything more.
    for (const auto& existingShard : existingShards.getValue().value) {
        auto swExistingShardConnStr = ConnectionString::parse(existingShard.getHost());
        if (!swExistingShardConnStr.isOK()) {
            return swExistingShardConnStr.getStatus();
        }
        auto existingShardConnStr = std::move(swExistingShardConnStr.getValue());

        // Function for determining if the options for the shard that is being added match the
        // options of an existing shard that conflicts with it.
        auto shardsAreEquivalent = [&]() {
            if (proposedShardName && *proposedShardName != existingShard.getName()) {
                return false;
            }
            if (proposedShardConnectionString.type() != existingShardConnStr.type()) {
                return false;
            }
            if (proposedShardConnectionString.type() == ConnectionString::SET &&
                proposedShardConnectionString.getSetName() != existingShardConnStr.getSetName()) {
                return false;
            }
            if (proposedShardMaxSize != existingShard.getMaxSizeMB()) {
                return false;
            }
            return true;
        };

        if (existingShardConnStr.type() == ConnectionString::SET &&
            proposedShardConnectionString.type() == ConnectionString::SET &&
            existingShardConnStr.getSetName() == proposedShardConnectionString.getSetName()) {
            // An existing shard has the same replica set name as the shard being added.
            // If the options aren't the same, then this is an error,
            // but if the options match then the addShard operation should be immediately
            // considered a success and terminated.
            if (shardsAreEquivalent()) {
                return {existingShard};
            } else {
                return {ErrorCodes::IllegalOperation,
                        str::stream() << "A shard already exists containing the replica set '"
                                      << existingShardConnStr.getSetName()
                                      << "'"};
            }
        }

        for (const auto& existingHost : existingShardConnStr.getServers()) {
            // Look if any of the hosts in the existing shard are present within the shard trying
            // to be added.
            for (const auto& addingHost : proposedShardConnectionString.getServers()) {
                if (existingHost == addingHost) {
                    // At least one of the hosts in the shard being added already exists in an
                    // existing shard.  If the options aren't the same, then this is an error,
                    // but if the options match then the addShard operation should be immediately
                    // considered a success and terminated.
                    if (shardsAreEquivalent()) {
                        return {existingShard};
                    } else {
                        return {ErrorCodes::IllegalOperation,
                                str::stream() << "'" << addingHost.toString() << "' "
                                              << "is already a member of the existing shard '"
                                              << existingShard.getHost()
                                              << "' ("
                                              << existingShard.getName()
                                              << ")."};
                    }
                }
            }
        }

        if (proposedShardName && *proposedShardName == existingShard.getName()) {
            // If we get here then we're trying to add a shard with the same name as an existing
            // shard, but there was no overlap in the hosts between the existing shard and the
            // proposed connection string for the new shard.
            return {ErrorCodes::IllegalOperation,
                    str::stream() << "A shard named " << *proposedShardName << " already exists"};
        }
    }

    return {boost::none};
}
Esempio n. 5
0
    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere
        string nameInternal;
        if ( ! name ) {
            name = &nameInternal;
        }

        ReplicaSetMonitorPtr rsMonitor;

        // Check whether the host (or set) exists and run several sanity checks on this request.
        // There are two set of sanity checks: making sure adding this particular shard is consistent
        // with the replica set state (if it exists) and making sure this shards databases can be
        // brought into the grid without conflict.

        if ( servers.type() == ConnectionString::SYNC ) {
            errMsg = "can't use sync cluster as a shard for replica set, "
                    "have to use <setname>/<server1>,<server2>,...";
            return false;
        }

        vector<string> dbNames;
        try {
            bool ok = false;

            {
                ScopedDbConnection newShardConn(servers.toString());

                BSONObj resIsMongos;
                ok = newShardConn->runCommand( "admin", BSON( "isdbgrid" << 1 ), resIsMongos );
                newShardConn.done();
            }

            // should return ok=0, cmd not found if it's a normal mongod
            if ( ok ) {
                errMsg = "can't add a mongos process as a shard";
                return false;
            }

            if ( servers.type() == ConnectionString::SET ) {
                if (!addReplSetShardCheck( servers, &errMsg )) {
                    return false;
                }

                // shard name defaults to the name of the replica set
                if ( name->empty() && !servers.getSetName().empty() ) {
                    *name = servers.getSetName();
                }
            }

            // In order to be accepted as a new shard, that mongod must not have any database name
            // that exists already in any other shards. If that test passes, the new shard's
            // databases are going to be entered as non-sharded db's whose primary is the
            // newly added shard.

            BSONObj resListDB;
            {
                ScopedDbConnection newShardConn(servers.toString());
                ok = newShardConn->runCommand( "admin", BSON( "listDatabases" << 1 ), resListDB );
                newShardConn.done();
            }

            if ( !ok ) {
                errMsg = str::stream() << "failed listing " << servers.toString()
                            << "'s databases:" << resListDB;;
                return false;
            }

            BSONObjIterator i( resListDB["databases"].Obj() );
            while ( i.more() ) {
                BSONObj dbEntry = i.next().Obj();
                const string& dbName = dbEntry["name"].String();
                if ( _isSpecialLocalDB( dbName ) ) {
                    // 'local', 'admin', and 'config' are system DBs and should be excluded here
                    continue;
                }
                else {
                    dbNames.push_back( dbName );
                }
            }

            if ( servers.type() == ConnectionString::SET ) {
                rsMonitor = ReplicaSetMonitor::get( servers.getSetName() );
            }
        }
        catch ( DBException& e ) {
            if ( servers.type() == ConnectionString::SET ) {
                ReplicaSetMonitor::remove( servers.getSetName() );
            }

            errMsg = str::stream() << "couldn't connect to new shard " << causedBy(e);
            return false;
        }

        // check that none of the existing shard candidate's db's exist elsewhere
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , false );
            if ( config.get() != NULL ) {
                ostringstream ss;
                ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                ss << "' exists in another " << config->getPrimary().toString();
                errMsg = ss.str();
                return false;
            }
        }

        // if a name for a shard wasn't provided, pick one.
        if ( name->empty() && ! _getNewShardName( name ) ) {
            errMsg = "error generating new shard name";
            return false;
        }

        // build the ConfigDB shard document
        BSONObjBuilder b;
        b.append(ShardType::name(), *name);
        b.append(ShardType::host(),
                 rsMonitor ? rsMonitor->getServerAddress() : servers.toString());
        if (maxSize > 0) {
            b.append(ShardType::maxSize(), maxSize);
        }
        BSONObj shardDoc = b.obj();

        {
            ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30);

            // check whether the set of hosts (or single host) is not an already a known shard
            BSONObj old = conn->findOne(ShardType::ConfigNS,
                                        BSON(ShardType::host(servers.toString())));

            if ( ! old.isEmpty() ) {
                errMsg = "host already used";
                conn.done();
                return false;
            }

            log() << "going to add shard: " << shardDoc << endl;

            conn->insert(ShardType::ConfigNS , shardDoc);
            errMsg = conn->getLastError();
            if ( ! errMsg.empty() ) {
                log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
                conn.done();
                return false;
            }

            conn.done();
        }

        Shard::reloadShardInfo();

        // add all databases of the new shard
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , true , *name );
            if ( ! config ) {
                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
            }
        }

        // Record in changelog
        BSONObjBuilder shardDetails;
        shardDetails.append("name", *name);
        shardDetails.append("host", servers.toString());
        configServer.logChange("addShard", "", shardDetails.obj());

        return true;
    }
Esempio n. 6
0
    StatusWith<string> isValidShard(const string& name,
                                    const ConnectionString& shardConnectionString,
                                    ScopedDbConnection& conn) {
        if (conn->type() == ConnectionString::SYNC) {
            return Status(ErrorCodes::BadValue,
                          "can't use sync cluster as a shard; for a replica set, "
                          "you have to use <setname>/<server1>,<server2>,...");
        }

        BSONObj resIsMongos;
        // (ok == 0) implies that it is a mongos
        if (conn->runCommand("admin", BSON("isdbgrid" << 1), resIsMongos)) {
            return Status(ErrorCodes::BadValue,
                          "can't add a mongos process as a shard");
        }

        BSONObj resIsMaster;
        if (!conn->runCommand("admin", BSON("isMaster" << 1), resIsMaster)) {
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "failed running isMaster: " << resIsMaster);
        }

        // if the shard has only one host, make sure it is not part of a replica set
        string setName = resIsMaster["setName"].str();
        string commandSetName = shardConnectionString.getSetName();
        if (commandSetName.empty() && !setName.empty()) {
            return Status(ErrorCodes::BadValue,
                          str::stream() << "host is part of set " << setName << "; "
                                        << "use replica set url format "
                                        << "<setname>/<server1>,<server2>, ...");
        }

        if (!commandSetName.empty() && setName.empty()) {
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "host did not return a set name; "
                                        << "is the replica set still initializing? "
                                        << resIsMaster);
        }

        // if the shard is part of replica set, make sure it is the right one
        if (!commandSetName.empty() && (commandSetName != setName)) {
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "host is part of a different set: " << setName);
        }

        if (setName.empty()) {
            // check this isn't a --configsvr
            BSONObj res;
            bool ok = conn->runCommand("admin",
                                       BSON("replSetGetStatus" << 1),
                                       res);
            if(!ok && res["info"].type() == String && res["info"].String() == "configsvr") {
                return Status(ErrorCodes::BadValue,
                              "the specified mongod is a --configsvr and "
                              "should thus not be a shard server");
            }
        }

        // if the shard is part of a replica set,
        // make sure all the hosts mentioned in 'shardConnectionString' are part of
        // the set. It is fine if not all members of the set are present in 'shardConnectionString'.
        bool foundAll = true;
        string offendingHost;
        if (!commandSetName.empty()) {
            set<string> hostSet;
            BSONObjIterator iter(resIsMaster["hosts"].Obj());
            while (iter.more()) {
                hostSet.insert(iter.next().String()); // host:port
            }
            if (resIsMaster["passives"].isABSONObj()) {
                BSONObjIterator piter(resIsMaster["passives"].Obj());
                while (piter.more()) {
                    hostSet.insert(piter.next().String()); // host:port
                }
            }
            if (resIsMaster["arbiters"].isABSONObj()) {
                BSONObjIterator piter(resIsMaster["arbiters"].Obj());
                while (piter.more()) {
                    hostSet.insert(piter.next().String()); // host:port
                }
            }

            vector<HostAndPort> hosts = shardConnectionString.getServers();
            for (size_t i = 0; i < hosts.size(); i++) {
                if (!hosts[i].hasPort()) {
                    hosts[i] = HostAndPort(hosts[i].host(), hosts[i].port());
                }
                string host = hosts[i].toString(); // host:port
                if (hostSet.find(host) == hostSet.end()) {
                    offendingHost = host;
                    foundAll = false;
                    break;
                }
            }
        }
        if (!foundAll) {
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "in seed list " << shardConnectionString.toString()
                                        << ", host " << offendingHost
                                        << " does not belong to replica set " << setName);
        }

        string shardName(name);
        // shard name defaults to the name of the replica set
        if (name.empty() && !setName.empty()) {
            shardName = setName;
        }

        // disallow adding shard replica set with name 'config'
        if (shardName == "config") {
            return Status(ErrorCodes::BadValue,
                          "use of shard replica set with name 'config' is not allowed");
        }

        return shardName;
    }
StatusWith<string> ShardingCatalogManagerImpl::addShard(
    OperationContext* txn,
    const std::string* shardProposedName,
    const ConnectionString& shardConnectionString,
    const long long maxSize) {
    if (shardConnectionString.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Invalid connection string"};
    }

    if (shardProposedName && shardProposedName->empty()) {
        return {ErrorCodes::BadValue, "shard name cannot be empty"};
    }

    // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
    const std::shared_ptr<Shard> shard{
        Grid::get(txn)->shardRegistry()->createConnection(shardConnectionString)};
    invariant(shard);
    auto targeter = shard->getTargeter();

    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        _validateHostAsShard(txn, targeter, shardProposedName, shardConnectionString);
    if (!shardStatus.isOK()) {
        // TODO: This is a workaround for the case were we could have some bad shard being
        // requested to be added and we put that bad connection string on the global replica set
        // monitor registry. It needs to be cleaned up so that when a correct replica set is added,
        // it will be recreated.
        ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        return shardStatus.getStatus();
    }

    ShardType& shardType = shardStatus.getValue();

    auto dbNamesStatus = _getDBNamesListFromShard(txn, targeter);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    // Check that none of the existing shard candidate's dbs exist already
    for (const string& dbName : dbNamesStatus.getValue()) {
        auto dbt = _catalogClient->getDatabase(txn, dbName);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                          << "'"
                          << shardConnectionString.toString()
                          << "'"
                          << " because a local database '"
                          << dbName
                          << "' exists in another "
                          << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        StatusWith<string> result = _generateNewShardName(txn);
        if (!result.isOK()) {
            return result.getStatus();
        }
        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    ShardIdentityType shardIdentity;
    shardIdentity.setConfigsvrConnString(
        Grid::get(txn)->shardRegistry()->getConfigServerConnectionString());
    shardIdentity.setShardName(shardType.getName());
    shardIdentity.setClusterId(Grid::get(txn)->shardRegistry()->getClusterId());
    auto validateStatus = shardIdentity.validate();
    if (!validateStatus.isOK()) {
        return validateStatus;
    }

    log() << "going to insert shardIdentity document into shard: " << shardIdentity.toString();

    auto updateRequest = shardIdentity.createUpsertForAddShard();
    BatchedCommandRequest commandRequest(updateRequest.release());
    commandRequest.setNS(NamespaceString::kConfigCollectionNamespace);
    commandRequest.setWriteConcern(kMajorityWriteConcern.toBSON());

    auto swCommandResponse =
        _runCommandForAddShard(txn, targeter.get(), "admin", commandRequest.toBSON());

    if (!swCommandResponse.isOK()) {
        return swCommandResponse.getStatus();
    }

    auto commandResponse = std::move(swCommandResponse.getValue());

    BatchedCommandResponse batchResponse;
    auto batchResponseStatus =
        Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
    if (!batchResponseStatus.isOK()) {
        return batchResponseStatus;
    }

    log() << "going to insert new entry for shard into config.shards: " << shardType.toString();

    Status result =
        _catalogClient->insertConfigDocument(txn, ShardType::ConfigNS, shardType.toBSON());
    if (!result.isOK()) {
        log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
        if (result == ErrorCodes::DuplicateKey) {
            // TODO(SERVER-24213): adding a shard that already exists should be considered success,
            // however this approach does no validation that we are adding the shard with the same
            // options.  It also does not protect against adding the same shard with a different
            // shard name and slightly different connection string.  This is a temporary hack to
            // get the continuous stepdown suite passing.
            warning() << "Received duplicate key error when inserting new shard with name "
                      << shardType.getName() << " and connection string "
                      << shardConnectionString.toString()
                      << " to config.shards collection.  This most likely means that there was an "
                      "attempt to add a shard that already exists in the cluster";
            return shardType.getName();
        }
        return result;
    }

    // Add all databases which were discovered on the new shard
    for (const string& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt;
        dbt.setName(dbName);
        dbt.setPrimary(shardType.getName());
        dbt.setSharded(false);

        Status status = _catalogClient->updateDatabase(txn, dbName, dbt);
        if (!status.isOK()) {
            log() << "adding shard " << shardConnectionString.toString()
                  << " even though could not add database " << dbName;
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    _catalogClient->logChange(txn, "addShard", "", shardDetails.obj());

    // Ensure the added shard is visible to this process.
    auto shardRegistry = Grid::get(txn)->shardRegistry();
    if (!shardRegistry->getShard(txn, shardType.getName())) {
        return {ErrorCodes::OperationFailed,
                "Could not find shard metadata for shard after adding it. This most likely "
                "indicates that the shard was removed immediately after it was added."};
    }

    return shardType.getName();
}
Esempio n. 8
0
    /**
     * Performs sanity check on the given connection string on whether the seed list
     * is consistent with the view of the set using replSetGetStatus.
     */
    bool addReplSetShardCheck( const ConnectionString& servers, string* errMsg ) {
        bool ok = false;
        BSONObj replSetStat;
        try {
            ScopedDbConnection newShardConn(servers.toString());
            ok = newShardConn->runCommand( "admin", BSON( "replSetGetStatus" << 1 ),
                    replSetStat );
            newShardConn.done();
        }
        catch ( const DBException& ex ) {
            *errMsg = str::stream() << "Error encountered while checking status of "
                    << servers.toString() << ": " << causedBy( ex );
        }

        if( !ok ) {
            if ( replSetStat["info"].str() == "configsvr" ) {
                *errMsg = "the specified mongod is a --configsvr and "
                    "should thus not be a shard server";
            }
            else {
                *errMsg = str::stream() << "error encountered calling replSetGetStatus: "
                        << replSetStat;
            }

            return false;
        }

        // if the shard has only one host, make sure it is not part of a replica set
        string setName = replSetStat["set"].str();
        string commandSetName = servers.getSetName();
        if ( commandSetName.empty() && ! setName.empty() ) {
            *errMsg = str::stream() << "host is part of set: " << setName
                        << " use replica set url format <setname>/<server1>,<server2>,....";
            return false;
        }

        if ( !commandSetName.empty() && setName.empty() ) {
            *errMsg = str::stream() << "host did not return a set name, "
                        << "is the replica set still initializing?" << replSetStat;
            return false;
        }

        // if the shard is part of replica set, make sure it is the right one
        if ( ! commandSetName.empty() && ( commandSetName != setName ) ) {
            *errMsg = str::stream() << "host is part of a different set: " << setName;
            return false;
        }

        // if the shard is part of a replica set, make sure all the hosts mentioned in
        // 'servers' are part of the set. It is fine if not all members of the set
        // are present in 'servers'.
        bool foundAll = true;
        string offendingHost;
        if ( ! commandSetName.empty() ) {
            set<string> hostSet;

            BSONElement membersElem( replSetStat["members"] );
            if ( membersElem.type() == Array ) {
                BSONArrayIteratorSorted iter( BSONArray( membersElem.Obj() ));
                while ( iter.more() ) {
                    hostSet.insert( iter.next()["name"].str() ); // host:port
                }

                vector<HostAndPort> hosts = servers.getServers();
                for ( size_t i = 0 ; i < hosts.size() ; i++ ) {
                    if (!hosts[i].hasPort()) {
                        hosts[i].setPort(CmdLine::DefaultDBPort);
                    }
                    string host = hosts[i].toString(); // host:port
                    if ( hostSet.find( host ) == hostSet.end() ) {
                        offendingHost = host;
                        foundAll = false;
                        break;
                    }
                }
            }

            if ( hostSet.empty() ) {
                *errMsg = "replSetGetStatus returned an empty set. "
                        " Please wait for the set to initialize and try again.";
                return false;
            }
        }

        if ( ! foundAll ) {
            *errMsg = str::stream() << "in seed list " << servers.toString()
                            << ", host " << offendingHost
                            << " does not belong to replica set " << setName;
            return false;
        }

        return true;
    }
StatusWith<ShardType> ShardingCatalogManagerImpl::_validateHostAsShard(
    OperationContext* txn,
    std::shared_ptr<RemoteCommandTargeter> targeter,
    const std::string* shardProposedName,
    const ConnectionString& connectionString) {
    // Check whether any host in the connection is already part of the cluster.
    Grid::get(txn)->shardRegistry()->reload(txn);
    for (const auto& hostAndPort : connectionString.getServers()) {
        std::shared_ptr<Shard> shard;
        shard = Grid::get(txn)->shardRegistry()->getShardNoReload(hostAndPort.toString());
        if (shard) {
            return {ErrorCodes::OperationFailed,
                    str::stream() << "'" << hostAndPort.toString() << "' "
                    << "is already a member of the existing shard '"
                    << shard->getConnString().toString()
                    << "' ("
                    << shard->getId()
                    << ")."};
        }
    }

    // Check for mongos and older version mongod connections, and whether the hosts
    // can be found for the user specified replset.
    auto swCommandResponse =
        _runCommandForAddShard(txn, targeter.get(), "admin", BSON("isMaster" << 1));
    if (!swCommandResponse.isOK()) {
        if (swCommandResponse.getStatus() == ErrorCodes::RPCProtocolNegotiationFailed) {
            // Mongos to mongos commands are no longer supported in the wire protocol
            // (because mongos does not support OP_COMMAND), similarly for a new mongos
            // and an old mongod. So the call will fail in such cases.
            // TODO: If/When mongos ever supports opCommands, this logic will break because
            // cmdStatus will be OK.
            return {ErrorCodes::RPCProtocolNegotiationFailed,
                    str::stream() << targeter->connectionString().toString()
                    << " does not recognize the RPC protocol being used. This is"
                    << " likely because it contains a node that is a mongos or an old"
                    << " version of mongod."};
        } else {
            return swCommandResponse.getStatus();
        }
    }

    // Check for a command response error
    auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus);
    if (!resIsMasterStatus.isOK()) {
        return {resIsMasterStatus.code(),
                str::stream() << "Error running isMaster against "
                << targeter->connectionString().toString()
                << ": "
                << causedBy(resIsMasterStatus)};
    }

    auto resIsMaster = std::move(swCommandResponse.getValue().response);

    // Check whether there is a master. If there isn't, the replica set may not have been
    // initiated. If the connection is a standalone, it will return true for isMaster.
    bool isMaster;
    Status status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster);
    if (!status.isOK()) {
        return Status(status.code(),
                      str::stream() << "isMaster returned invalid 'ismaster' "
                      << "field when attempting to add "
                      << connectionString.toString()
                      << " as a shard: "
                      << status.reason());
    }
    if (!isMaster) {
        return {ErrorCodes::NotMaster,
                str::stream()
                << connectionString.toString()
                << " does not have a master. If this is a replica set, ensure that it has a"
                << " healthy primary and that the set has been properly initiated."};
    }

    const string providedSetName = connectionString.getSetName();
    const string foundSetName = resIsMaster["setName"].str();

    // Make sure the specified replica set name (if any) matches the actual shard's replica set
    if (providedSetName.empty() && !foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host is part of set " << foundSetName << "; "
                << "use replica set url format "
                << "<setname>/<server1>,<server2>, ..."};
    }

    if (!providedSetName.empty() && foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host did not return a set name; "
                << "is the replica set still initializing? "
                << resIsMaster};
    }

    // Make sure the set name specified in the connection string matches the one where its hosts
    // belong into
    if (!providedSetName.empty() && (providedSetName != foundSetName)) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "the provided connection string (" << connectionString.toString()
                << ") does not match the actual set name "
                << foundSetName};
    }

    // Is it a config server?
    if (resIsMaster.hasField("configsvr")) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "Cannot add " << connectionString.toString()
                << " as a shard since it is a config server"};
    }

    // If the shard is part of a replica set, make sure all the hosts mentioned in the connection
    // string are part of the set. It is fine if not all members of the set are mentioned in the
    // connection string, though.
    if (!providedSetName.empty()) {
        std::set<string> hostSet;

        BSONObjIterator iter(resIsMaster["hosts"].Obj());
        while (iter.more()) {
            hostSet.insert(iter.next().String());  // host:port
        }

        if (resIsMaster["passives"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["passives"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        if (resIsMaster["arbiters"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["arbiters"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        vector<HostAndPort> hosts = connectionString.getServers();
        for (size_t i = 0; i < hosts.size(); i++) {
            const string host = hosts[i].toString();  // host:port
            if (hostSet.find(host) == hostSet.end()) {
                return {ErrorCodes::OperationFailed,
                        str::stream() << "in seed list " << connectionString.toString() << ", host "
                        << host
                        << " does not belong to replica set "
                        << foundSetName
                        << "; found "
                        << resIsMaster.toString()};
            }
        }
    }

    string actualShardName;

    if (shardProposedName) {
        actualShardName = *shardProposedName;
    } else if (!foundSetName.empty()) {
        // Default it to the name of the replica set
        actualShardName = foundSetName;
    }

    // Disallow adding shard replica set with name 'config'
    if (actualShardName == "config") {
        return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"};
    }

    // Retrieve the most up to date connection string that we know from the replica set monitor (if
    // this is a replica set shard, otherwise it will be the same value as connectionString).
    ConnectionString actualShardConnStr = targeter->connectionString();

    ShardType shard;
    shard.setName(actualShardName);
    shard.setHost(actualShardConnStr.toString());

    return shard;
}
StatusWith<string> CatalogManagerCommon::addShard(OperationContext* txn,
                                                  const std::string* shardProposedName,
                                                  const ConnectionString& shardConnectionString,
                                                  const long long maxSize) {
    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        validateHostAsShard(txn, grid.shardRegistry(), shardConnectionString, shardProposedName);
    if (!shardStatus.isOK()) {
        // TODO: This is a workaround for the case were we could have some bad shard being
        // requested to be added and we put that bad connection string on the global replica set
        // monitor registry. It needs to be cleaned up so that when a correct replica set is added,
        // it will be recreated.
        ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        return shardStatus.getStatus();
    }

    ShardType& shardType = shardStatus.getValue();

    auto dbNamesStatus = getDBNamesListFromShard(txn, grid.shardRegistry(), shardConnectionString);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    // Check that none of the existing shard candidate's dbs exist already
    for (const string& dbName : dbNamesStatus.getValue()) {
        auto dbt = getDatabase(txn, dbName);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                                        << "'" << shardConnectionString.toString() << "'"
                                        << " because a local database '" << dbName
                                        << "' exists in another " << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        StatusWith<string> result = _generateNewShardName(txn);
        if (!result.isOK()) {
            return Status(ErrorCodes::OperationFailed, "error generating new shard name");
        }

        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    log() << "going to add shard: " << shardType.toString();

    Status result = insert(txn, ShardType::ConfigNS, shardType.toBSON(), NULL);
    if (!result.isOK()) {
        log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
        return result;
    }

    // Make sure the new shard is visible
    grid.shardRegistry()->reload(txn);

    // Add all databases which were discovered on the new shard
    for (const string& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt;
        dbt.setName(dbName);
        dbt.setPrimary(shardType.getName());
        dbt.setSharded(false);

        Status status = updateDatabase(txn, dbName, dbt);
        if (!status.isOK()) {
            log() << "adding shard " << shardConnectionString.toString()
                  << " even though could not add database " << dbName;
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    logChange(txn, txn->getClient()->clientAddress(true), "addShard", "", shardDetails.obj());

    return shardType.getName();
}
Esempio n. 11
0
    Status DBClientShardResolver::chooseWriteHost( const string& shardName,
                                                   ConnectionString* shardHost ) const {

        // Declare up here for parsing later
        string errMsg;

        // Special-case for config and admin
        if ( shardName == "config" || shardName == "admin" ) {
            *shardHost = ConnectionString::parse( configServer.modelServer(), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }

        //
        // First get the information about the shard from the shard cache
        //

        // Internally uses our shard cache, does no reload
        Shard shard = Shard::findIfExists( shardName );
        if ( shard.getName() == "" ) {
            return Status( ErrorCodes::ShardNotFound,
                           string("unknown shard name ") + shardName );
        }

        ConnectionString rawShardHost = ConnectionString::parse( shard.getConnString(), errMsg );
        dassert( errMsg == "" );
        dassert( rawShardHost.type() == ConnectionString::SET
                 || rawShardHost.type() == ConnectionString::MASTER );

        if ( rawShardHost.type() == ConnectionString::MASTER ) {
            *shardHost = rawShardHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Does not reload the monitor if it doesn't currently exist
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawShardHost.getSetName(),
                                                                   false );
        if ( !replMonitor ) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawShardHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMaster();
            *shardHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
StatusWith<std::string> ShardingCatalogManagerImpl::addShard(
    OperationContext* opCtx,
    const std::string* shardProposedName,
    const ConnectionString& shardConnectionString,
    const long long maxSize) {
    if (shardConnectionString.type() == ConnectionString::INVALID) {
        return {ErrorCodes::BadValue, "Invalid connection string"};
    }

    if (shardProposedName && shardProposedName->empty()) {
        return {ErrorCodes::BadValue, "shard name cannot be empty"};
    }

    // Only one addShard operation can be in progress at a time.
    Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock);

    // Check if this shard has already been added (can happen in the case of a retry after a network
    // error, for example) and thus this addShard request should be considered a no-op.
    auto existingShard =
        _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize);
    if (!existingShard.isOK()) {
        return existingShard.getStatus();
    }
    if (existingShard.getValue()) {
        // These hosts already belong to an existing shard, so report success and terminate the
        // addShard request.  Make sure to set the last optime for the client to the system last
        // optime so that we'll still wait for replication so that this state is visible in the
        // committed snapshot.
        repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
        return existingShard.getValue()->getName();
    }

    // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a
    // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the
    // set with that setName from the ReplicaSetMonitorManager and will create a new
    // ReplicaSetMonitor when targeting the set below.
    // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike
    // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a
    // shard is removed.
    if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) {
        // If the first reload joined an existing one, call reload again to ensure the reload is
        // fresh.
        Grid::get(opCtx)->shardRegistry()->reload(opCtx);
    }

    // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
    const std::shared_ptr<Shard> shard{
        Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)};
    invariant(shard);
    auto targeter = shard->getTargeter();

    auto stopMonitoringGuard = MakeGuard([&] {
        if (shardConnectionString.type() == ConnectionString::SET) {
            // This is a workaround for the case were we could have some bad shard being
            // requested to be added and we put that bad connection string on the global replica set
            // monitor registry. It needs to be cleaned up so that when a correct replica set is
            // added, it will be recreated.
            ReplicaSetMonitor::remove(shardConnectionString.getSetName());
        }
    });

    // Validate the specified connection string may serve as shard at all
    auto shardStatus =
        _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString);
    if (!shardStatus.isOK()) {
        return shardStatus.getStatus();
    }
    ShardType& shardType = shardStatus.getValue();

    // Check that none of the existing shard candidate's dbs exist already
    auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter);
    if (!dbNamesStatus.isOK()) {
        return dbNamesStatus.getStatus();
    }

    for (const auto& dbName : dbNamesStatus.getValue()) {
        auto dbt = Grid::get(opCtx)->catalogClient(opCtx)->getDatabase(opCtx, dbName);
        if (dbt.isOK()) {
            const auto& dbDoc = dbt.getValue().value;
            return Status(ErrorCodes::OperationFailed,
                          str::stream() << "can't add shard "
                                        << "'"
                                        << shardConnectionString.toString()
                                        << "'"
                                        << " because a local database '"
                                        << dbName
                                        << "' exists in another "
                                        << dbDoc.getPrimary());
        } else if (dbt != ErrorCodes::NamespaceNotFound) {
            return dbt.getStatus();
        }
    }

    // If a name for a shard wasn't provided, generate one
    if (shardType.getName().empty()) {
        auto result = generateNewShardName(opCtx);
        if (!result.isOK()) {
            return result.getStatus();
        }
        shardType.setName(result.getValue());
    }

    if (maxSize > 0) {
        shardType.setMaxSizeMB(maxSize);
    }

    // If the minimum allowed version for the cluster is 3.4, set the featureCompatibilityVersion to
    // 3.4 on the shard.
    if (serverGlobalParams.featureCompatibility.version.load() ==
        ServerGlobalParams::FeatureCompatibility::Version::k34) {
        auto versionResponse =
            _runCommandForAddShard(opCtx,
                                   targeter.get(),
                                   "admin",
                                   BSON(FeatureCompatibilityVersion::kCommandName
                                        << FeatureCompatibilityVersionCommandParser::kVersion34));
        if (!versionResponse.isOK()) {
            return versionResponse.getStatus();
        }

        if (!versionResponse.getValue().commandStatus.isOK()) {
            if (versionResponse.getStatus().code() == ErrorCodes::CommandNotFound) {
                return {ErrorCodes::OperationFailed,
                        "featureCompatibilityVersion for cluster is 3.4, cannot add a shard with "
                        "version below 3.4. See "
                        "http://dochub.mongodb.org/core/3.4-feature-compatibility."};
            }
            return versionResponse.getValue().commandStatus;
        }
    }

    if (!MONGO_FAIL_POINT(dontUpsertShardIdentityOnNewShards)) {
        auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName());

        LOG(2) << "going to insert shardIdentity document into shard: " << shardType;

        auto swCommandResponse =
            _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest);
        if (!swCommandResponse.isOK()) {
            return swCommandResponse.getStatus();
        }

        auto commandResponse = std::move(swCommandResponse.getValue());

        BatchedCommandResponse batchResponse;
        auto batchResponseStatus =
            Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
        if (!batchResponseStatus.isOK()) {
            return batchResponseStatus;
        }
    }

    log() << "going to insert new entry for shard into config.shards: " << shardType.toString();

    Status result = Grid::get(opCtx)->catalogClient(opCtx)->insertConfigDocument(
        opCtx,
        ShardType::ConfigNS,
        shardType.toBSON(),
        ShardingCatalogClient::kMajorityWriteConcern);
    if (!result.isOK()) {
        log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
        return result;
    }

    // Add all databases which were discovered on the new shard
    for (const auto& dbName : dbNamesStatus.getValue()) {
        DatabaseType dbt;
        dbt.setName(dbName);
        dbt.setPrimary(shardType.getName());
        dbt.setSharded(false);

        Status status = Grid::get(opCtx)->catalogClient(opCtx)->updateDatabase(opCtx, dbName, dbt);
        if (!status.isOK()) {
            log() << "adding shard " << shardConnectionString.toString()
                  << " even though could not add database " << dbName;
        }
    }

    // Record in changelog
    BSONObjBuilder shardDetails;
    shardDetails.append("name", shardType.getName());
    shardDetails.append("host", shardConnectionString.toString());

    Grid::get(opCtx)->catalogClient(opCtx)->logChange(
        opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern);

    // Ensure the added shard is visible to this process.
    auto shardRegistry = Grid::get(opCtx)->shardRegistry();
    if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) {
        return {ErrorCodes::OperationFailed,
                "Could not find shard metadata for shard after adding it. This most likely "
                "indicates that the shard was removed immediately after it was added."};
    }
    stopMonitoringGuard.Dismiss();

    return shardType.getName();
}
StatusWith<ShardType> ShardingCatalogManagerImpl::_validateHostAsShard(
    OperationContext* opCtx,
    std::shared_ptr<RemoteCommandTargeter> targeter,
    const std::string* shardProposedName,
    const ConnectionString& connectionString) {

    // Check if the node being added is a mongos or a version of mongod too old to speak the current
    // communication protocol.
    auto swCommandResponse =
        _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON("isMaster" << 1));
    if (!swCommandResponse.isOK()) {
        if (swCommandResponse.getStatus() == ErrorCodes::RPCProtocolNegotiationFailed) {
            // Mongos to mongos commands are no longer supported in the wire protocol
            // (because mongos does not support OP_COMMAND), similarly for a new mongos
            // and an old mongod. So the call will fail in such cases.
            // TODO: If/When mongos ever supports opCommands, this logic will break because
            // cmdStatus will be OK.
            return {ErrorCodes::RPCProtocolNegotiationFailed,
                    str::stream() << targeter->connectionString().toString()
                                  << " does not recognize the RPC protocol being used. This is"
                                  << " likely because it contains a node that is a mongos or an old"
                                  << " version of mongod."};
        } else {
            return swCommandResponse.getStatus();
        }
    }

    // Check for a command response error
    auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus);
    if (!resIsMasterStatus.isOK()) {
        return {resIsMasterStatus.code(),
                str::stream() << "Error running isMaster against "
                              << targeter->connectionString().toString()
                              << ": "
                              << causedBy(resIsMasterStatus)};
    }

    auto resIsMaster = std::move(swCommandResponse.getValue().response);

    // Check that the node being added is a new enough version.
    // If we're running this code, that means the mongos that the addShard request originated from
    // must be at least version 3.4 (since 3.2 mongoses don't know about the _configsvrAddShard
    // command).  Since it is illegal to have v3.4 mongoses with v3.2 shards, we should reject
    // adding any shards that are not v3.4.  We can determine this by checking that the
    // maxWireVersion reported in isMaster is at least COMMANDS_ACCEPT_WRITE_CONCERN.
    // TODO(SERVER-25623): This approach won't work to prevent v3.6 mongoses from adding v3.4
    // shards, so we'll have to rethink this during the 3.5 development cycle.

    long long maxWireVersion;
    Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion);
    if (!status.isOK()) {
        return Status(status.code(),
                      str::stream() << "isMaster returned invalid 'maxWireVersion' "
                                    << "field when attempting to add "
                                    << connectionString.toString()
                                    << " as a shard: "
                                    << status.reason());
    }
    if (maxWireVersion < WireVersion::COMMANDS_ACCEPT_WRITE_CONCERN) {
        return Status(ErrorCodes::IncompatibleServerVersion,
                      str::stream() << "Cannot add " << connectionString.toString()
                                    << " as a shard because we detected a mongod with server "
                                       "version older than 3.4.0.  It is invalid to add v3.2 and "
                                       "older shards through a v3.4 mongos.");
    }


    // Check whether there is a master. If there isn't, the replica set may not have been
    // initiated. If the connection is a standalone, it will return true for isMaster.
    bool isMaster;
    status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster);
    if (!status.isOK()) {
        return Status(status.code(),
                      str::stream() << "isMaster returned invalid 'ismaster' "
                                    << "field when attempting to add "
                                    << connectionString.toString()
                                    << " as a shard: "
                                    << status.reason());
    }
    if (!isMaster) {
        return {ErrorCodes::NotMaster,
                str::stream()
                    << connectionString.toString()
                    << " does not have a master. If this is a replica set, ensure that it has a"
                    << " healthy primary and that the set has been properly initiated."};
    }

    const std::string providedSetName = connectionString.getSetName();
    const std::string foundSetName = resIsMaster["setName"].str();

    // Make sure the specified replica set name (if any) matches the actual shard's replica set
    if (providedSetName.empty() && !foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host is part of set " << foundSetName << "; "
                              << "use replica set url format "
                              << "<setname>/<server1>,<server2>, ..."};
    }

    if (!providedSetName.empty() && foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host did not return a set name; "
                              << "is the replica set still initializing? "
                              << resIsMaster};
    }

    // Make sure the set name specified in the connection string matches the one where its hosts
    // belong into
    if (!providedSetName.empty() && (providedSetName != foundSetName)) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "the provided connection string (" << connectionString.toString()
                              << ") does not match the actual set name "
                              << foundSetName};
    }

    // Is it a config server?
    if (resIsMaster.hasField("configsvr")) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "Cannot add " << connectionString.toString()
                              << " as a shard since it is a config server"};
    }

    // If the shard is part of a replica set, make sure all the hosts mentioned in the connection
    // string are part of the set. It is fine if not all members of the set are mentioned in the
    // connection string, though.
    if (!providedSetName.empty()) {
        std::set<std::string> hostSet;

        BSONObjIterator iter(resIsMaster["hosts"].Obj());
        while (iter.more()) {
            hostSet.insert(iter.next().String());  // host:port
        }

        if (resIsMaster["passives"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["passives"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        if (resIsMaster["arbiters"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["arbiters"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        for (const auto& hostEntry : connectionString.getServers()) {
            const auto& host = hostEntry.toString();  // host:port
            if (hostSet.find(host) == hostSet.end()) {
                return {ErrorCodes::OperationFailed,
                        str::stream() << "in seed list " << connectionString.toString() << ", host "
                                      << host
                                      << " does not belong to replica set "
                                      << foundSetName
                                      << "; found "
                                      << resIsMaster.toString()};
            }
        }
    }

    std::string actualShardName;

    if (shardProposedName) {
        actualShardName = *shardProposedName;
    } else if (!foundSetName.empty()) {
        // Default it to the name of the replica set
        actualShardName = foundSetName;
    }

    // Disallow adding shard replica set with name 'config'
    if (actualShardName == NamespaceString::kConfigDb) {
        return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"};
    }

    // Retrieve the most up to date connection string that we know from the replica set monitor (if
    // this is a replica set shard, otherwise it will be the same value as connectionString).
    ConnectionString actualShardConnStr = targeter->connectionString();

    ShardType shard;
    shard.setName(actualShardName);
    shard.setHost(actualShardConnStr.toString());
    shard.setState(ShardType::ShardState::kShardAware);

    return shard;
}