Beispiel #1
0
    bool Balancer::_checkOIDs() {
        vector<Shard> all;
        Shard::getAllShards( all );

        map<int,Shard> oids;

        for ( vector<Shard>::iterator i=all.begin(); i!=all.end(); ++i ) {
            Shard s = *i;
            BSONObj f = s.runCommand( "admin" , "features" );
            if ( f["oidMachine"].isNumber() ) {
                int x = f["oidMachine"].numberInt();
                if ( oids.count(x) == 0 ) {
                    oids[x] = s;
                }
                else {
                    log() << "error: 2 machines have " << x << " as oid machine piece " << s.toString() << " and " << oids[x].toString() << endl;
                    s.runCommand( "admin" , BSON( "features" << 1 << "oidReset" << 1 ) );
                    oids[x].runCommand( "admin" , BSON( "features" << 1 << "oidReset" << 1 ) );
                    return false;
                }
            }
            else {
                log() << "warning: oidMachine not set on: " << s.toString() << endl;
            }
        }
        return true;
    }
Beispiel #2
0
    Shard * Shard::split( const BSONObj& m ){
        uassert( "can't split as shard that doesn't have a manager" , _manager );
        
        log(1) << " before split on: "  << m << "\n"
               << "\t self  : " << toString() << endl;

        uassert( "locking namespace on server failed" , lockNamespaceOnServer( getServer() , _ns ) );

        Shard * s = new Shard( _manager );
        s->_ns = _ns;
        s->_server = _server;
        s->_min = m.getOwned();
        s->_max = _max;
        
        s->_markModified();
        _markModified();
        
        _manager->_shards.push_back( s );
        
        _max = m.getOwned(); 
        
        log(1) << " after split:\n" 
               << "\t left : " << toString() << "\n" 
               << "\t right: "<< s->toString() << endl;
        
        
        _manager->save();
        
        return s;
    }
Beispiel #3
0
    ShardManager::ShardManager( DBConfig * config , string ns , ShardKeyPattern pattern ) : _config( config ) , _ns( ns ) , _key( pattern ){
        Shard temp(0);
        
        ScopedDbConnection conn( temp.modelServer() );
        auto_ptr<DBClientCursor> cursor = conn->query( temp.getNS() , BSON( "ns" <<  ns ) );
        while ( cursor->more() ){
            Shard * s = new Shard( this );
            BSONObj d = cursor->next();
            s->unserialize( d );
            _shards.push_back( s );
            s->_id = d["_id"].wrap().getOwned();
        }
        conn.done();
        
        if ( _shards.size() == 0 ){
            Shard * s = new Shard( this );
            s->_ns = ns;
            s->_min = _key.globalMin();
            s->_max = _key.globalMax();
            s->_server = config->getPrimary();
            s->_markModified();
            
            _shards.push_back( s );
            
            log() << "no shards for:" << ns << " so creating first: " << s->toString() << endl;
        }

        _sequenceNumber = ++NextSequenceNumber;
    }
Beispiel #4
0
    bool Shard::moveIfShould( Shard * newShard ){
        Shard * toMove = 0;
       
        if ( newShard->countObjects() <= 1 ){
            toMove = newShard;
        }
        else if ( this->countObjects() <= 1 ){
            toMove = this;
        }
        else {
            log(1) << "don't know how to decide if i should move inner shard" << endl;
        }

        if ( ! toMove )
            return false;
        
        string newLocation = grid.pickServerForNewDB();
        if ( newLocation == getServer() ){
            // if this is the best server, then we shouldn't do anything!
            log(1) << "not moving shard: " << toString() << " b/c would move to same place  " << newLocation << " -> " << getServer() << endl;
            return 0;
        }

        log() << "moving shard (auto): " << toMove->toString() << " to: " << newLocation << " #objcets: " << toMove->countObjects() << endl;

        string errmsg;
        massert( (string)"moveAndCommit failed: " + errmsg , 
                 toMove->moveAndCommit( newLocation , errmsg ) );
        
        return true;
    }
Beispiel #5
0
    bool Chunk::moveAndCommit(const Shard& to,
                              long long chunkSize /* bytes */,
                              const WriteConcernOptions* writeConcern,
                              bool waitForDelete,
                              int maxTimeMS,
                              BSONObj& res) const {
        uassert( 10167 ,  "can't move shard to its current location!" , getShard() != to );

        log() << "moving chunk ns: " << _manager->getns() << " moving ( " << toString() << ") "
              << _shard.toString() << " -> " << to.toString();

        Shard from = _shard;
        ScopedDbConnection fromconn(from.getConnString());

        BSONObjBuilder builder;
        builder.append("moveChunk", _manager->getns());
        builder.append("from", from.getAddress().toString());
        builder.append("to", to.getAddress().toString());
        // NEEDED FOR 2.0 COMPATIBILITY
        builder.append("fromShard", from.getName());
        builder.append("toShard", to.getName());
        ///////////////////////////////
        builder.append("min", _min);
        builder.append("max", _max);
        builder.append("maxChunkSizeBytes", chunkSize);
        builder.append("shardId", genID());
        builder.append("configdb", configServer.modelServer());

        // For legacy secondary throttle setting.
        bool secondaryThrottle = true;
        if (writeConcern &&
                writeConcern->wNumNodes <= 1 &&
                writeConcern->wMode.empty()) {
            secondaryThrottle = false;
        }

        builder.append("secondaryThrottle", secondaryThrottle);

        if (secondaryThrottle && writeConcern) {
            builder.append("writeConcern", writeConcern->toBSON());
        }

        builder.append("waitForDelete", waitForDelete);
        builder.append(LiteParsedQuery::cmdOptionMaxTimeMS, maxTimeMS);
        builder.append("epoch", _manager->getVersion().epoch());

        bool worked = fromconn->runCommand("admin", builder.done(), res);
        fromconn.done();

        LOG( worked ? 1 : 0 ) << "moveChunk result: " << res;

        // if succeeded, needs to reload to pick up the new location
        // if failed, mongos may be stale
        // reload is excessive here as the failure could be simply because collection metadata is taken
        _manager->reload();

        return worked;
    }
    /**
     * Updates the remote cached version on the remote shard host (primary, in the case of replica
     * sets) if needed with a fully-qualified shard version for the given namespace:
     *   config server(s) + shard name + shard version
     *
     * If no remote cached version has ever been set, an initial shard version is sent.
     *
     * If the namespace is empty and no version has ever been sent, the config server + shard name
     * is sent to the remote shard host to initialize the connection as coming from mongos.
     * NOTE: This initialization is *best-effort only*.  Operations which wish to correctly version
     * must send the namespace.
     *
     * Config servers are special and are not (unless otherwise a shard) kept up to date with this
     * protocol.  This is safe so long as config servers only contain unversioned collections.
     *
     * It is an error to call checkShardVersion with an unversionable connection (isVersionableCB).
     *
     * @return true if we contacted the remote host
     */
    bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) {
        // TODO: cache, optimize, etc...

        // Empty namespaces are special - we require initialization but not versioning
        if (ns.size() == 0) {
            return initShardVersionEmptyNS(conn_in);
        }

        DBConfigPtr conf = grid.getDBConfig( ns );
        if ( ! conf )
            return false;

        DBClientBase* conn = getVersionable( conn_in );
        verify(conn); // errors thrown above

        unsigned long long officialSequenceNumber = 0;

        ShardPtr primary;
        ChunkManagerPtr manager;
        if (authoritative)
            conf->getChunkManagerIfExists(ns, true);

        conf->getChunkManagerOrPrimary(ns, manager, primary);

        if (manager)
            officialSequenceNumber = manager->getSequenceNumber();

        // Check this manager against the reference manager
        if( manager ){

            Shard shard = Shard::make( conn->getServerAddress() );
            if (refManager && !refManager->compatibleWith(*manager, shard.getName())) {
                const ChunkVersion refVersion(refManager->getVersion(shard.getName()));
                const ChunkVersion currentVersion(manager->getVersion(shard.getName()));
                string msg(str::stream() << "manager ("
                        << currentVersion.toString()
                        << " : " << manager->getSequenceNumber() << ") "
                        << "not compatible with reference manager ("
                        << refVersion.toString()
                        << " : " << refManager->getSequenceNumber() << ") "
                        << "on shard " << shard.getName()
                        << " (" << shard.getAddress().toString() << ")");

                throw SendStaleConfigException(ns,
                                               msg,
                                               refVersion,
                                               currentVersion);
            }
        }
        else if( refManager ){

            Shard shard = Shard::make(conn->getServerAddress());
            string msg( str::stream() << "not sharded ("
                        << ( (manager.get() == 0) ? string( "<none>" ) :
                                str::stream() << manager->getSequenceNumber() )
                        << ") but has reference manager ("
                        << refManager->getSequenceNumber() << ") "
                        << "on conn " << conn->getServerAddress() << " ("
                        << conn_in->getServerAddress() << ")" );

            throw SendStaleConfigException(ns,
                                           msg,
                                           refManager->getVersion(shard.getName()),
                                           ChunkVersion::UNSHARDED());
        }

        // Do not send setShardVersion to collections on the config servers - this causes problems
        // when config servers are also shards and get SSV with conflicting names.
        // TODO: Make config servers regular shards
        if (primary && primary->getName() == "config") {
            return false;
        }

        // Has the ChunkManager been reloaded since the last time we updated the shard version over
        // this connection?  If we've never updated the shard version, do so now.
        unsigned long long sequenceNumber = 0;
        if (connectionShardStatus.getSequence(conn, ns, &sequenceNumber)) {
            if (sequenceNumber == officialSequenceNumber) {
                return false;
            }
        }

        // Now that we're sure we're sending SSV and not to a single config server, get the shard
        Shard shard = Shard::make(conn->getServerAddress());

        ChunkVersion version = ChunkVersion(0, 0, OID());
        if (manager)
            version = manager->getVersion(shard.getName());

        LOG(1) << "setting shard version of " << version << " for " << ns << " on shard "
               << shard.toString();

        LOG(3) << "last version sent with chunk manager iteration " << sequenceNumber
               << ", current chunk manager iteration is " << officialSequenceNumber;

        BSONObj result;
        if (setShardVersion(*conn,
                            ns,
                            configServer.modelServer(),
                            version,
                            manager.get(),
                            authoritative,
                            result)) {

            LOG(1) << "      setShardVersion success: " << result;
            connectionShardStatus.setSequence( conn , ns , officialSequenceNumber );
            return true;
        }

        LOG(1) << "       setShardVersion failed!\n" << result << endl;

        if ( result["need_authoritative"].trueValue() )
            massert( 10428 ,  "need_authoritative set but in authoritative mode already" , ! authoritative );

        if ( ! authoritative ) {
            // use the original connection and get a fresh versionable connection
            // since conn can be invalidated (or worse, freed) after the failure
            checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1);
            return true;
        }
        
        if ( result["reloadConfig"].trueValue() ) {
            if( result["version"].timestampTime() == 0 ){

                warning() << "reloading full configuration for " << conf->name()
                          << ", connection state indicates significant version changes";

                // reload db
                conf->reload();
            }
            else {
                // reload config
                conf->getChunkManager( ns , true );
            }
        }

        const int maxNumTries = 7;
        if ( tryNumber < maxNumTries ) {
            LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) 
                << "going to retry checkShardVersion shard: " << shard.toString() << " " << result;
            sleepmillis( 10 * tryNumber );
            // use the original connection and get a fresh versionable connection
            // since conn can be invalidated (or worse, freed) after the failure
            checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1);
            return true;
        }
        
        string errmsg = str::stream() << "setShardVersion failed shard: " << shard.toString()
                                          << " " << result;
        log() << "     " << errmsg << endl;
        massert( 10429 , errmsg , 0 );
        return true;
    }
    /**
     * Special internal logic to run reduced version handshake for empty namespace operations to
     * shards.
     *
     * Eventually this should go completely away, but for now many commands rely on unversioned but
     * mongos-specific behavior on mongod (auditing and replication information in commands)
     */
    static bool initShardVersionEmptyNS(DBClientBase * conn_in) {

        bool ok;
        BSONObj result;
        DBClientBase* conn = NULL;
        try {
            // May throw if replica set primary is down
            conn = getVersionable( conn_in );
            dassert( conn ); // errors thrown above

            // Check to see if we've already initialized this connection
            if (connectionShardStatus.hasAnySequenceSet(conn))
                return false;

            // Check to see if this is actually a shard and not a single config server
            // NOTE: Config servers are registered only by the name "config" in the shard cache, not
            // by host, so lookup by host will fail unless the host is also a shard.
            Shard shard = Shard::findIfExists(conn->getServerAddress());
            if (!shard.ok())
                return false;

            LOG(1) << "initializing shard connection to " << shard.toString() << endl;

            ok = setShardVersion(*conn,
                                 "",
                                 configServer.modelServer(),
                                 ChunkVersion(),
                                 NULL,
                                 true,
                                 result);
        }
        catch( const DBException& ) {

            // NOTE: Replica sets may fail to initShardVersion because future calls relying on
            // correct versioning must later call checkShardVersion on the primary.
            // Secondary queries and commands may not call checkShardVersion, but secondary ops
            // aren't versioned at all.
            if ( conn_in->type() != ConnectionString::SET ) {
                throw;
            }

            // NOTE: Only old-style cluster operations will talk via DBClientReplicaSets - using
            // checkShardVersion is required (which includes initShardVersion information) if these
            // connections are used.

            OCCASIONALLY {
                warning() << "failed to initialize new replica set connection version, "
                          << "will initialize on first use" << endl;
            }

            return false;
        }

        // Record the connection wire version if sent in the response, initShardVersion is a
        // handshake for mongos->mongod connections.
        if ( !result["minWireVersion"].eoo() ) {

            int minWireVersion = result["minWireVersion"].numberInt();
            int maxWireVersion = result["maxWireVersion"].numberInt();
            conn->setWireVersions( minWireVersion, maxWireVersion );
        }

        LOG(3) << "initial sharding result : " << result << endl;

        connectionShardStatus.setSequence(conn, "", 0);
        return ok;
    }