Example #1
0
    void ReplicaSetMonitor::checkAll() {
        set<string> seen;

        while ( true ) {
            ReplicaSetMonitorPtr m;
            {
                for ( map<string,ReplicaSetMonitorPtr>::iterator i=_sets.begin(); i!=_sets.end(); ++i ) {
                    string name = i->first;
                    if ( seen.count( name ) )
                        continue;
                    LOG(0) << "checking replica set: " << name << endl;
                    seen.insert( name );
                    m = i->second;
                    break;
                }
            }

            if ( ! m )
                break;

            m->check();
        }


    }
Example #2
0
    void DBConnectionPool::appendInfo( BSONObjBuilder& b ) {

        int avail = 0;
        long long created = 0;


        map<ConnectionString::ConnectionType,long long> createdByType;
        
        BSONObjBuilder bb( b.subobjStart( "hosts" ) );
        {
            boost::lock_guard<boost::mutex> lk( _mutex );
            for ( PoolMap::iterator i=_pools.begin(); i!=_pools.end(); ++i ) {
                if ( i->second.numCreated() == 0 )
                    continue;

                string s = str::stream() << i->first.ident << "::" << i->first.timeout;

                BSONObjBuilder temp( bb.subobjStart( s ) );
                temp.append( "available" , i->second.numAvailable() );
                temp.appendNumber( "created" , i->second.numCreated() );
                temp.done();

                avail += i->second.numAvailable();
                created += i->second.numCreated();

                long long& x = createdByType[i->second.type()];
                x += i->second.numCreated();
            }
        }
        bb.done();
        
        // Always report all replica sets being tracked
        set<string> replicaSets = ReplicaSetMonitor::getAllTrackedSets();
        
        BSONObjBuilder setBuilder( b.subobjStart( "replicaSets" ) );
        for ( set<string>::iterator i=replicaSets.begin(); i!=replicaSets.end(); ++i ) {
            string rs = *i;
            ReplicaSetMonitorPtr m = ReplicaSetMonitor::get( rs );
            if ( ! m ) {
                warning() << "no monitor for set: " << rs << endl;
                continue;
            }
            
            BSONObjBuilder temp( setBuilder.subobjStart( rs ) );
            m->appendInfo( temp );
            temp.done();
        }
        setBuilder.done();

        {
            BSONObjBuilder temp( bb.subobjStart( "createdByType" ) );
            for ( map<ConnectionString::ConnectionType,long long>::iterator i=createdByType.begin(); i!=createdByType.end(); ++i ) {
                temp.appendNumber( ConnectionString::typeToString( i->first ) , i->second );
            }
            temp.done();
        }

        b.append( "totalAvailable" , avail );
        b.appendNumber( "totalCreated" , created );
    }
Example #3
0
    DBClientConnection* DBClientReplicaSet::selectNodeUsingTags(
            shared_ptr<ReadPreferenceSetting> readPref) {
        if (!shouldReevaluate() && checkLastHost(readPref.get())) {

            LOG( 3 ) << "dbclient_rs selecting compatible last used node " << _lastSlaveOkHost
                                << endl;

            return _lastSlaveOkConn.get();
        }

        ReplicaSetMonitorPtr monitor = _getMonitor();
        _lastSlaveOkHost = monitor->getHostOrRefresh(*readPref);

        if ( _lastSlaveOkHost.empty() ){

            LOG( 3 ) << "dbclient_rs no compatible node found" << endl;

            return NULL;
        }

        _lastReadPref = readPref;

        // Primary connection is special because it is the only connection that is
        // versioned in mongos. Therefore, we have to make sure that this object
        // maintains only one connection to the primary and use that connection
        // every time we need to talk to the primary.
        if (monitor->isPrimary(_lastSlaveOkHost)) {
            checkMaster();
            _lastSlaveOkConn = _master;
            _lastSlaveOkHost = _masterHost; // implied, but still assign just to be safe

            LOG( 3 ) << "dbclient_rs selecting primary node " << _lastSlaveOkHost << endl;

            return _master.get();
        }

        string errmsg;
        ConnectionString connStr(_lastSlaveOkHost);
        // Needs to perform a dynamic_cast because we need to set the replSet
        // callback. We should eventually not need this after we remove the
        // callback.
        DBClientConnection* newConn = dynamic_cast<DBClientConnection*>(
                connStr.connect(errmsg, _so_timeout));

        // Assert here instead of returning NULL since the contract of this method is such
        // that returning NULL means none of the nodes were good, which is not the case here.
        uassert(16532, str::stream() << "Failed to connect to " << _lastSlaveOkHost.toString(),
                newConn != NULL);

        _lastSlaveOkConn.reset(newConn);
        _lastSlaveOkConn->setReplSetClientCallback(this);
        _lastSlaveOkConn->setRunCommandHook(_runCommandHook);
        _lastSlaveOkConn->setPostRunCommandHook(_postRunCommandHook);

        _auth(_lastSlaveOkConn.get());

        LOG( 3 ) << "dbclient_rs selecting node " << _lastSlaveOkHost << endl;

        return _lastSlaveOkConn.get();
    }
Example #4
0
 // This can't throw an exception because it is called in the destructor of ScopedDbConnection
 string DBClientReplicaSet::getServerAddress() const {
     ReplicaSetMonitorPtr rsm = ReplicaSetMonitor::get( _setName, true );
     if ( !rsm ) {
         warning() << "Trying to get server address for DBClientReplicaSet, but no "
             "ReplicaSetMonitor exists for " << _setName << endl;
         return str::stream() << _setName << "/" ;
     }
     return rsm->getServerAddress();
 }
Example #5
0
 void DBClientReplicaSet::isntMaster() { 
     log() << "got not master for: " << _masterHost << endl;
     // Can't use _getMonitor because that will create a new monitor from the cached seed if
     // the monitor doesn't exist.
     ReplicaSetMonitorPtr monitor = ReplicaSetMonitor::get( _setName );
     if ( monitor ) {
         monitor->failedHost( _masterHost );
     }
     _master.reset(); 
 }
Example #6
0
void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk(
    OperationContext* txn,
    PCStatePtr state,
    const ShardId& shardId,
    std::shared_ptr<Shard> primary,
    const NamespaceString& ns,
    const string& vinfo,
    std::shared_ptr<ChunkManager> manager) {
    if (manager) {
        state->manager = manager;
    } else if (primary) {
        state->primary = primary;
    }

    verify(!primary || shardId == primary->getId());

    // Setup conn
    if (!state->conn) {
        const auto shard = grid.shardRegistry()->getShard(txn, shardId);
        state->conn.reset(new ShardConnection(shard->getConnString(), ns.ns(), manager));
    }

    const DBClientBase* rawConn = state->conn->getRawConn();
    bool allowShardVersionFailure = rawConn->type() == ConnectionString::SET &&
        DBClientReplicaSet::isSecondaryQuery(_qSpec.ns(), _qSpec.query(), _qSpec.options());
    bool connIsDown = rawConn->isFailed();
    if (allowShardVersionFailure && !connIsDown) {
        // If the replica set connection believes that it has a valid primary that is up,
        // confirm that the replica set monitor agrees that the suspected primary is indeed up.
        const DBClientReplicaSet* replConn = dynamic_cast<const DBClientReplicaSet*>(rawConn);
        invariant(replConn);
        ReplicaSetMonitorPtr rsMonitor = ReplicaSetMonitor::get(replConn->getSetName());
        if (!rsMonitor->isHostUp(replConn->getSuspectedPrimaryHostAndPort())) {
            connIsDown = true;
        }
    }

    if (allowShardVersionFailure && connIsDown) {
        // If we're doing a secondary-allowed query and the primary is down, don't attempt to
        // set the shard version.

        state->conn->donotCheckVersion();

        // A side effect of this short circuiting is the mongos will not be able figure out that
        // the primary is now up on it's own and has to rely on other threads to refresh node
        // states.

        OCCASIONALLY {
            const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(rawConn);
            dassert(repl);
            warning() << "Primary for " << repl->getServerAddress()
                      << " was down before, bypassing setShardVersion."
                      << " The local replica set view and targeting may be stale.";
        }
    } else {
Example #7
0
    bool Shard::containsNode( const string& node ) const {
        if ( _addr == node )
            return true;

        if ( _cs.type() == ConnectionString::SET ) {
            ReplicaSetMonitorPtr rs = ReplicaSetMonitor::get( _cs.getSetName(), true );
            return rs->contains( node );
        }

        return false;
    }
Example #8
0
        BSONObj replMonitorStats(const BSONObj& a, void* data) {
            uassert(17134, "replMonitorStats requires a single string argument (the ReplSet name)",
                    a.nFields() == 1 && a.firstElement().type() == String);

            ReplicaSetMonitorPtr rsm = ReplicaSetMonitor::get(a.firstElement().valuestrsafe(),true);
            if (!rsm) {
                return BSON("" << "no ReplSetMonitor exists by that name");
            }
            BSONObjBuilder result;
            rsm->appendInfo(result);
            return result.obj();
        }
Example #9
0
void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk(
    OperationContext* txn,
    PCStatePtr state,
    const ShardId& shardId,
    std::shared_ptr<Shard> primary,
    const NamespaceString& ns,
    const string& vinfo,
    std::shared_ptr<ChunkManager> manager) {
    if (manager) {
        state->manager = manager;
    } else if (primary) {
        state->primary = primary;
    }

    verify(!primary || shardId == primary->getId());

    // Setup conn
    if (!state->conn) {
        const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId));
        state->conn.reset(new ShardConnection(shard->getConnString(), ns.ns(), manager));
    }

    const DBClientBase* rawConn = state->conn->getRawConn();
    bool allowShardVersionFailure = rawConn->type() == ConnectionString::SET &&
        DBClientReplicaSet::isSecondaryQuery(_qSpec.ns(), _qSpec.query(), _qSpec.options());

    // Skip shard version checking if primary is known to be down.
    if (allowShardVersionFailure) {
        const DBClientReplicaSet* replConn = dynamic_cast<const DBClientReplicaSet*>(rawConn);
        invariant(replConn);
        ReplicaSetMonitorPtr rsMonitor = ReplicaSetMonitor::get(replConn->getSetName());
        uassert(16388,
                str::stream() << "cannot access unknown replica set: " << replConn->getSetName(),
                rsMonitor != nullptr);
        if (!rsMonitor->isKnownToHaveGoodPrimary()) {
            state->conn->donotCheckVersion();

            // A side effect of this short circuiting is the mongos will not be able figure out
            // that the primary is now up on it's own and has to rely on other threads to refresh
            // node states.

            OCCASIONALLY {
                const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(rawConn);
                dassert(repl);
                warning() << "Primary for " << repl->getServerAddress()
                          << " was down before, bypassing setShardVersion."
                          << " The local replica set view and targeting may be stale.";
            }

            return;
        }
    }
    Status DBClientShardResolver::findMaster( const std::string connString,
                                              ConnectionString* resolvedHost ) {
        std::string errMsg;

        ConnectionString rawHost = ConnectionString::parse( connString, errMsg );
        dassert( errMsg == "" );
        dassert( rawHost.type() == ConnectionString::SET
                 || rawHost.type() == ConnectionString::MASTER );

        if ( rawHost.type() == ConnectionString::MASTER ) {
            *resolvedHost = rawHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Don't create the monitor unless we need to - fast path
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get(rawHost.getSetName());

        if (!replMonitor) {
            // Slow path
            std::set<HostAndPort> seedServers(rawHost.getServers().begin(),
                                              rawHost.getServers().end());
            ReplicaSetMonitor::createIfNeeded(rawHost.getSetName(), seedServers);
            replMonitor = ReplicaSetMonitor::get(rawHost.getSetName());
        }

        if (!replMonitor) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert();
            *resolvedHost = ConnectionString::parse( masterHostAndPort.toString(), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
Example #11
0
    DBClientConnection * DBClientReplicaSet::checkMaster() {
        ReplicaSetMonitorPtr monitor = _getMonitor();
        HostAndPort h = monitor->getMasterOrUassert();

        if ( h == _masterHost && _master ) {
            // a master is selected.  let's just make sure connection didn't die
            if ( ! _master->isFailed() )
                return _master.get();

            monitor->failedHost( _masterHost );
            h = monitor->getMasterOrUassert(); // old master failed, try again.
        }

        _masterHost = h;

        ConnectionString connStr(_masterHost);

        string errmsg;
        DBClientConnection* newConn = NULL;

        try {
            // Needs to perform a dynamic_cast because we need to set the replSet
            // callback. We should eventually not need this after we remove the
            // callback.
            newConn = dynamic_cast<DBClientConnection*>(
                    connStr.connect(errmsg, _so_timeout));
        }
        catch (const AssertionException& ex) {
            errmsg = ex.toString();
        }

        if (newConn == NULL || !errmsg.empty()) {
            monitor->failedHost(_masterHost);
            uasserted(13639, str::stream() << "can't connect to new replica set master ["
                      << _masterHost.toString() << "]"
                      << (errmsg.empty()? "" : ", err: ") << errmsg);
        }

        _master.reset(newConn);
        _master->setReplSetClientCallback(this);
        _master->setRunCommandHook(_runCommandHook);
        _master->setPostRunCommandHook(_postRunCommandHook);

        _auth( _master.get() );
        return _master.get();
    }
Example #12
0
    bool Shard::containsNode( const string& node ) const {
        if ( _addr == node )
            return true;

        if ( _cs.type() == ConnectionString::SET ) {
            ReplicaSetMonitorPtr rs = ReplicaSetMonitor::get( _cs.getSetName(), true );

            if (!rs) {
                // Possibly still yet to be initialized. See SERVER-8194.
                warning() << "Monitor not found for a known shard: " << _cs.getSetName() << endl;
                return false;
            }

            return rs->contains( node );
        }

        return false;
    }
Example #13
0
    Status DBClientShardResolver::findMaster( const std::string connString,
                                              ConnectionString* resolvedHost ) {
        std::string errMsg;

        ConnectionString rawHost = ConnectionString::parse( connString, errMsg );
        dassert( errMsg == "" );
        dassert( rawHost.type() == ConnectionString::SET
                 || rawHost.type() == ConnectionString::MASTER );

        if ( rawHost.type() == ConnectionString::MASTER ) {
            *resolvedHost = rawHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Does not reload the monitor if it doesn't currently exist
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawHost.getSetName(),
                                                                   false );
        if ( !replMonitor ) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert();
            *resolvedHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
Example #14
0
void DBClientConnection::handleNotMasterResponse(const BSONObj& replyBody,
                                                 StringData errorMsgFieldName) {
    const BSONElement errorMsgElem = replyBody[errorMsgFieldName];
    const BSONElement codeElem = replyBody["code"];

    if (!isNotMasterErrorString(errorMsgElem) &&
        !ErrorCodes::isNotMasterError(ErrorCodes::Error(codeElem.numberInt()))) {
        return;
    }

    ReplicaSetMonitorPtr monitor = ReplicaSetMonitor::get(_parentReplSetName);
    if (monitor) {
        monitor->failedHost(_serverAddress,
                            {ErrorCodes::NotMaster,
                             str::stream() << "got not master from: " << _serverAddress
                                           << " of repl set: "
                                           << _parentReplSetName});
    }

    _markFailed(kSetFlag);
}
// Ensure nothing breaks when out-of-band failedHost is called during scan
TEST(ReplicaSetMonitorTests, OutOfBandFailedHost) {
    SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet);
    ReplicaSetMonitorPtr rsm = boost::make_shared<ReplicaSetMonitor>(state);
    Refresher refresher = rsm->startOrContinueRefresh();

    for (size_t i = 0; i != basicSeeds.size(); ++i) {
        NextStep ns = refresher.getNextStep();
    }

    for (size_t i = 0; i != basicSeeds.size(); ++i) {
        bool primary = (i == 0);

        refresher.receivedIsMaster(basicSeeds[i], -1, BSON(
                                       "setName" << "name"
                                       << "ismaster" << primary
                                       << "secondary" << !primary
                                       << "hosts" << BSON_ARRAY("a" << "b" << "c")
                                       << "ok" << true
                                   ));

        if (i >= 1) {
            HostAndPort a("a");
            rsm->failedHost(a);
            Node* node = state->findNode(a);
            ASSERT(node);
            ASSERT(!node->isUp);
            ASSERT(!node->isMaster);
        }
        else {
            Node* node = state->findNode(HostAndPort("a"));
            ASSERT(node);
            ASSERT(node->isUp);
            ASSERT(node->isMaster);
        }
    }
}
Example #16
0
    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere
        string nameInternal;
        if ( ! name ) {
            name = &nameInternal;
        }

        ReplicaSetMonitorPtr rsMonitor;

        // Check whether the host (or set) exists and run several sanity checks on this request.
        // There are two set of sanity checks: making sure adding this particular shard is consistent
        // with the replica set state (if it exists) and making sure this shards databases can be
        // brought into the grid without conflict.

        vector<string> dbNames;
        try {
            ScopedDbConnection newShardConn(servers.toString());
            newShardConn->getLastError();

            if ( newShardConn->type() == ConnectionString::SYNC ) {
                newShardConn.done();
                errMsg = "can't use sync cluster as a shard.  for replica set, have to use <setname>/<server1>,<server2>,...";
                return false;
            }
            
            BSONObj resIsMongos;
            bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos );

            // should return ok=0, cmd not found if it's a normal mongod
            if ( ok ) {
                errMsg = "can't add a mongos process as a shard";
                newShardConn.done();
                return false;
            }

            BSONObj resIsMaster;
            ok =  newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );
            if ( !ok ) {
                ostringstream ss;
                ss << "failed running isMaster: " << resIsMaster;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // if the shard has only one host, make sure it is not part of a replica set
            string setName = resIsMaster["setName"].str();
            string commandSetName = servers.getSetName();
            if ( commandSetName.empty() && ! setName.empty() ) {
                ostringstream ss;
                ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,....";
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }
            if ( !commandSetName.empty() && setName.empty() ) {
                ostringstream ss;
                ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // if the shard is part of replica set, make sure it is the right one
            if ( ! commandSetName.empty() && ( commandSetName != setName ) ) {
                ostringstream ss;
                ss << "host is part of a different set: " << setName;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            if( setName.empty() ) { 
                // check this isn't a --configsvr
                BSONObj res;
                bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res);
                ostringstream ss;
                if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) {
                    errMsg = "the specified mongod is a --configsvr and should thus not be a shard server";
                    newShardConn.done();
                    return false;
                }                
            }

            // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of
            // the set. It is fine if not all members of the set are present in 'servers'.
            bool foundAll = true;
            string offendingHost;
            if ( ! commandSetName.empty() ) {
                set<string> hostSet;
                BSONObjIterator iter( resIsMaster["hosts"].Obj() );
                while ( iter.more() ) {
                    hostSet.insert( iter.next().String() ); // host:port
                }
                if ( resIsMaster["passives"].isABSONObj() ) {
                    BSONObjIterator piter( resIsMaster["passives"].Obj() );
                    while ( piter.more() ) {
                        hostSet.insert( piter.next().String() ); // host:port
                    }
                }
                if ( resIsMaster["arbiters"].isABSONObj() ) {
                    BSONObjIterator piter( resIsMaster["arbiters"].Obj() );
                    while ( piter.more() ) {
                        hostSet.insert( piter.next().String() ); // host:port
                    }
                }

                vector<HostAndPort> hosts = servers.getServers();
                for ( size_t i = 0 ; i < hosts.size() ; i++ ) {
                    if (!hosts[i].hasPort()) {
                        hosts[i].setPort(ServerGlobalParams::DefaultDBPort);
                    }
                    string host = hosts[i].toString(); // host:port
                    if ( hostSet.find( host ) == hostSet.end() ) {
                        offendingHost = host;
                        foundAll = false;
                        break;
                    }
                }
            }
            if ( ! foundAll ) {
                ostringstream ss;
                ss << "in seed list " << servers.toString() << ", host " << offendingHost
                   << " does not belong to replica set " << setName;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            // shard name defaults to the name of the replica set
            if ( name->empty() && ! setName.empty() )
                *name = setName;

            // In order to be accepted as a new shard, that mongod must not have any database name that exists already
            // in any other shards. If that test passes, the new shard's databases are going to be entered as
            // non-sharded db's whose primary is the newly added shard.

            BSONObj resListDB;
            ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB );
            if ( !ok ) {
                ostringstream ss;
                ss << "failed listing " << servers.toString() << "'s databases:" << resListDB;
                errMsg = ss.str();
                newShardConn.done();
                return false;
            }

            BSONObjIterator i( resListDB["databases"].Obj() );
            while ( i.more() ) {
                BSONObj dbEntry = i.next().Obj();
                const string& dbName = dbEntry["name"].String();
                if ( _isSpecialLocalDB( dbName ) ) {
                    // 'local', 'admin', and 'config' are system DBs and should be excluded here
                    continue;
                }
                else {
                    dbNames.push_back( dbName );
                }
            }

            if ( newShardConn->type() == ConnectionString::SET ) 
                rsMonitor = ReplicaSetMonitor::get( setName );

            newShardConn.done();
        }
        catch ( DBException& e ) {
            if ( servers.type() == ConnectionString::SET ) {
                ReplicaSetMonitor::remove( servers.getSetName() );
            }
            ostringstream ss;
            ss << "couldn't connect to new shard ";
            ss << e.what();
            errMsg = ss.str();
            return false;
        }

        // check that none of the existing shard candidate's db's exist elsewhere
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , false );
            if ( config.get() != NULL ) {
                ostringstream ss;
                ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                ss << "' exists in another " << config->getPrimary().toString();
                errMsg = ss.str();
                return false;
            }
        }

        // if a name for a shard wasn't provided, pick one.
        if ( name->empty() && ! _getNewShardName( name ) ) {
            errMsg = "error generating new shard name";
            return false;
        }

        // build the ConfigDB shard document
        BSONObjBuilder b;
        b.append(ShardType::name(), *name);
        b.append(ShardType::host(),
                 rsMonitor ? rsMonitor->getServerAddress() : servers.toString());
        if (maxSize > 0) {
            b.append(ShardType::maxSize(), maxSize);
        }
        BSONObj shardDoc = b.obj();

        {
            ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30);

            // check whether the set of hosts (or single host) is not an already a known shard
            BSONObj old = conn->findOne(ShardType::ConfigNS,
                                        BSON(ShardType::host(servers.toString())));

            if ( ! old.isEmpty() ) {
                errMsg = "host already used";
                conn.done();
                return false;
            }
            conn.done();
        }

        log() << "going to add shard: " << shardDoc << endl;

        Status result = clusterInsert( ShardType::ConfigNS,
                                       shardDoc,
                                       WriteConcernOptions::AllConfigs,
                                       NULL );

        if ( !result.isOK() ) {
            errMsg = result.reason();
            log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
            return false;
        }

        Shard::reloadShardInfo();

        // add all databases of the new shard
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , true , *name );
            if ( ! config ) {
                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
            }
        }

        // Record in changelog
        BSONObjBuilder shardDetails;
        shardDetails.append("name", *name);
        shardDetails.append("host", servers.toString());
        configServer.logChange("addShard", "", shardDetails.obj());

        return true;
    }
    Status DBClientShardResolver::chooseWriteHost( const string& shardName,
                                                   ConnectionString* shardHost ) const {

        // Declare up here for parsing later
        string errMsg;

        // Special-case for config and admin
        if ( shardName == "config" || shardName == "admin" ) {
            *shardHost = ConnectionString::parse( configServer.modelServer(), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }

        //
        // First get the information about the shard from the shard cache
        //

        // Internally uses our shard cache, does no reload
        Shard shard = Shard::findIfExists( shardName );
        if ( shard.getName() == "" ) {
            return Status( ErrorCodes::ShardNotFound,
                           string("unknown shard name ") + shardName );
        }

        ConnectionString rawShardHost = ConnectionString::parse( shard.getConnString(), errMsg );
        dassert( errMsg == "" );
        dassert( rawShardHost.type() == ConnectionString::SET
                 || rawShardHost.type() == ConnectionString::MASTER );

        if ( rawShardHost.type() == ConnectionString::MASTER ) {
            *shardHost = rawShardHost;
            return Status::OK();
        }

        //
        // If we need to, then get the particular node we're targeting in the replica set
        //

        // Does not reload the monitor if it doesn't currently exist
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawShardHost.getSetName(),
                                                                   false );
        if ( !replMonitor ) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawShardHost.getSetName() );
        }

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMaster();
            *shardHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        }
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );
        }

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
    }
Example #18
0
    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere
        string nameInternal;
        if ( ! name ) {
            name = &nameInternal;
        }

        ReplicaSetMonitorPtr rsMonitor;

        // Check whether the host (or set) exists and run several sanity checks on this request.
        // There are two set of sanity checks: making sure adding this particular shard is consistent
        // with the replica set state (if it exists) and making sure this shards databases can be
        // brought into the grid without conflict.

        if ( servers.type() == ConnectionString::SYNC ) {
            errMsg = "can't use sync cluster as a shard for replica set, "
                    "have to use <setname>/<server1>,<server2>,...";
            return false;
        }

        vector<string> dbNames;
        try {
            bool ok = false;

            {
                ScopedDbConnection newShardConn(servers.toString());

                BSONObj resIsMongos;
                ok = newShardConn->runCommand( "admin", BSON( "isdbgrid" << 1 ), resIsMongos );
                newShardConn.done();
            }

            // should return ok=0, cmd not found if it's a normal mongod
            if ( ok ) {
                errMsg = "can't add a mongos process as a shard";
                return false;
            }

            if ( servers.type() == ConnectionString::SET ) {
                if (!addReplSetShardCheck( servers, &errMsg )) {
                    return false;
                }

                // shard name defaults to the name of the replica set
                if ( name->empty() && !servers.getSetName().empty() ) {
                    *name = servers.getSetName();
                }
            }

            // In order to be accepted as a new shard, that mongod must not have any database name
            // that exists already in any other shards. If that test passes, the new shard's
            // databases are going to be entered as non-sharded db's whose primary is the
            // newly added shard.

            BSONObj resListDB;
            {
                ScopedDbConnection newShardConn(servers.toString());
                ok = newShardConn->runCommand( "admin", BSON( "listDatabases" << 1 ), resListDB );
                newShardConn.done();
            }

            if ( !ok ) {
                errMsg = str::stream() << "failed listing " << servers.toString()
                            << "'s databases:" << resListDB;;
                return false;
            }

            BSONObjIterator i( resListDB["databases"].Obj() );
            while ( i.more() ) {
                BSONObj dbEntry = i.next().Obj();
                const string& dbName = dbEntry["name"].String();
                if ( _isSpecialLocalDB( dbName ) ) {
                    // 'local', 'admin', and 'config' are system DBs and should be excluded here
                    continue;
                }
                else {
                    dbNames.push_back( dbName );
                }
            }

            if ( servers.type() == ConnectionString::SET ) {
                rsMonitor = ReplicaSetMonitor::get( servers.getSetName() );
            }
        }
        catch ( DBException& e ) {
            if ( servers.type() == ConnectionString::SET ) {
                ReplicaSetMonitor::remove( servers.getSetName() );
            }

            errMsg = str::stream() << "couldn't connect to new shard " << causedBy(e);
            return false;
        }

        // check that none of the existing shard candidate's db's exist elsewhere
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , false );
            if ( config.get() != NULL ) {
                ostringstream ss;
                ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                ss << "' exists in another " << config->getPrimary().toString();
                errMsg = ss.str();
                return false;
            }
        }

        // if a name for a shard wasn't provided, pick one.
        if ( name->empty() && ! _getNewShardName( name ) ) {
            errMsg = "error generating new shard name";
            return false;
        }

        // build the ConfigDB shard document
        BSONObjBuilder b;
        b.append(ShardType::name(), *name);
        b.append(ShardType::host(),
                 rsMonitor ? rsMonitor->getServerAddress() : servers.toString());
        if (maxSize > 0) {
            b.append(ShardType::maxSize(), maxSize);
        }
        BSONObj shardDoc = b.obj();

        {
            ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30);

            // check whether the set of hosts (or single host) is not an already a known shard
            BSONObj old = conn->findOne(ShardType::ConfigNS,
                                        BSON(ShardType::host(servers.toString())));

            if ( ! old.isEmpty() ) {
                errMsg = "host already used";
                conn.done();
                return false;
            }

            log() << "going to add shard: " << shardDoc << endl;

            conn->insert(ShardType::ConfigNS , shardDoc);
            errMsg = conn->getLastError();
            if ( ! errMsg.empty() ) {
                log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
                conn.done();
                return false;
            }

            conn.done();
        }

        Shard::reloadShardInfo();

        // add all databases of the new shard
        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
            DBConfigPtr config = getDBConfig( *it , true , *name );
            if ( ! config ) {
                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
            }
        }

        // Record in changelog
        BSONObjBuilder shardDetails;
        shardDetails.append("name", *name);
        shardDetails.append("host", servers.toString());
        configServer.logChange("addShard", "", shardDetails.obj());

        return true;
    }