Example #1
0
        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
            string ns = cmdObj["getShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify full namespace";
                return false;
            }

            result.append( "configServer" , shardingState.getConfigServer() );

            result.appendTimestamp( "global" , shardingState.getVersion(ns).toLong() );

            ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
            result.appendBool( "inShardedMode" , info != 0 );
            if ( info )
                result.appendTimestamp( "mine" , info->getVersion(ns).toLong() );
            else
                result.appendTimestamp( "mine" , 0 );

            if ( cmdObj["fullMetadata"].trueValue() ) {
                CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( ns );
                if ( metadata ) result.append( "metadata", metadata->toBSON() );
                else result.append( "metadata", BSONObj() );
            }

            return true;
        }
Example #2
0
    bool checkConfigOrInit( const string& configdb , bool authoritative , string& errmsg , BSONObjBuilder& result , bool locked=false ) const {
        if ( configdb.size() == 0 ) {
            errmsg = "no configdb";
            return false;
        }

        if ( shardingState.enabled() ) {
            if ( configdb == shardingState.getConfigServer() )
                return true;

            result.append( "configdb" , BSON( "stored" << shardingState.getConfigServer() <<
                                              "given" << configdb ) );

            errmsg = str::stream() << "mongos specified a different config database string : "
                     << "stored : " << shardingState.getConfigServer()
                     << " vs given : " << configdb;
            return false;
        }

        if ( ! authoritative ) {
            result.appendBool( "need_authoritative" , true );
            errmsg = "first setShardVersion";
            return false;
        }

        if ( locked ) {
            ShardedConnectionInfo::addHook();
            shardingState.enable( configdb );
            configServer.init( configdb );
            return true;
        }

        Lock::GlobalWrite lk;
        return checkConfigOrInit( configdb , authoritative , errmsg , result , true );
    }
Example #3
0
        bool checkConfigOrInit( const string& configdb , bool authoritative , string& errmsg , BSONObjBuilder& result , bool locked=false ) const {
            if ( configdb.size() == 0 ) {
                errmsg = "no configdb";
                return false;
            }
            
            if ( shardingState.enabled() ) {
                if ( configdb == shardingState.getConfigServer() ) 
                    return true;
                
                result.append( "configdb" , BSON( "stored" << shardingState.getConfigServer() << 
                                                  "given" << configdb ) );
                errmsg = "specified a different configdb!";
                return false;
            }
            
            if ( ! authoritative ) {
                result.appendBool( "need_authoritative" , true );
                errmsg = "first setShardVersion";
                return false;
            }
            
            if ( locked ) {
                shardingState.enable( configdb );
                configServer.init( configdb );
                return true;
            }

            dblock lk;
            return checkConfigOrInit( configdb , authoritative , errmsg , result , true );
        }
Example #4
0
void logDeleteOpForSharding(OperationContext* txn,
                            const char* ns,
                            const BSONObj& obj,
                            bool notInActiveChunk) {
    ShardingState* shardingState = ShardingState::get(txn);
    if (shardingState->enabled())
        shardingState->migrationSourceManager()->logDeleteOp(txn, ns, obj, notInActiveChunk);
}
Example #5
0
    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ) {
        if ( ! shardingState.enabled() )
            return true;

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO
        //   all collections at some point, be sharded or not, will have a version (and a ShardChunkManager)
        //   for now, we remove the sharding state of dropped collection
        //   so delayed request may come in. This has to be fixed.
        ConfigVersion clientVersion = info->getVersion(ns);
        ConfigVersion version;
        if ( ! shardingState.hasVersion( ns , version ) && clientVersion == 0 ) {
            return true;
        }


        if ( version == 0 && clientVersion > 0 ) {
            stringstream ss;
            ss << "collection was dropped or this shard no longer valied version: " << version << " clientVersion: " << clientVersion;
            errmsg = ss.str();
            return false;
        }

        if ( clientVersion >= version )
            return true;


        if ( clientVersion == 0 ) {
            stringstream ss;
            ss << "client in sharded mode, but doesn't have version set for this collection: " << ns << " myVersion: " << version;
            errmsg = ss.str();
            return false;
        }

        if ( version.majorVersion() == clientVersion.majorVersion() ) {
            // this means there was just a split
            // since on a split w/o a migrate this server is ok
            // going to accept 
            return true;
        }

        stringstream ss;
        ss << "your version is too old  ns: " + ns << " global: " << version << " client: " << clientVersion;
        errmsg = ss.str();
        return false;
    }
Example #6
0
    bool haveLocalShardingInfo( const string& ns ) {
        if ( ! shardingState.enabled() )
            return false;

        if ( ! shardingState.hasVersion( ns ) )
            return false;

        return ShardedConnectionInfo::get(false) > 0;
    }
Example #7
0
    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ){
        if ( ! shardingState.enabled() )
            return true;

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ){
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }
        
        if ( info->inForceVersionOkMode() ){
            return true;
        }

        ConfigVersion version;    
        if ( ! shardingState.hasVersion( ns , version ) ){
            return true;
        }

        ConfigVersion clientVersion = info->getVersion(ns);

        if ( version == 0 && clientVersion > 0 ){
            stringstream ss;
            ss << "collection was dropped or this shard no longer valied version: " << version << " clientVersion: " << clientVersion;
            errmsg = ss.str();
            return false;
        }
        
        if ( clientVersion >= version )
            return true;
        

        if ( clientVersion == 0 ){
            stringstream ss;
            ss << "client in sharded mode, but doesn't have version set for this collection: " << ns << " myVersion: " << version;
            errmsg = ss.str();
            return false;
        }

        if ( isWriteOp && version.majorVersion() == clientVersion.majorVersion() ){
            // this means there was just a split 
            // since on a split w/o a migrate this server is ok
            // going to accept write
            return true;
        }

        stringstream ss;
        ss << "your version is too old  ns: " + ns << " global: " << version << " client: " << clientVersion;
        errmsg = ss.str();
        return false;
    }
Example #8
0
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
            string ns = cmdObj["getShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to speciy fully namespace";
                return false;
            }

            result.append( "configServer" , shardingState.getConfigServer() );

            result.appendTimestamp( "global" , shardingState.getVersion(ns) );

            ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
            if ( info )
                result.appendTimestamp( "mine" , info->getVersion(ns) );
            else
                result.appendTimestamp( "mine" , 0 );

            return true;
        }
Example #9
0
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){

            // Debugging code for SERVER-1633. Commands have already a coarser timer for
            // normal operation.
            Timer timer;
            vector<int> laps;

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            bool authoritative = cmdObj.getBoolField( "authoritative" );

            string configdb = cmdObj["configdb"].valuestrsafe();
            { // configdb checking
                if ( configdb.size() == 0 ){
                    errmsg = "no configdb";
                    return false;
                }
                
                if ( shardingState.enabled() ){
                    if ( configdb != shardingState.getConfigServer() ){
                        errmsg = "specified a different configdb!";
                        return false;
                    }
                }
                else {
                    if ( ! authoritative ){
                        result.appendBool( "need_authoritative" , true );
                        errmsg = "first setShardVersion";
                        return false;
                    }
                    shardingState.enable( configdb );
                    configServer.init( configdb );
                }
            }

            // SERVER-1633
            laps.push_back( timer.millis() );
            
            if ( cmdObj["shard"].type() == String ){
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }

            { // setting up ids
                if ( cmdObj["serverID"].type() != jstOID ){
                    // TODO: fix this
                    //errmsg = "need serverID to be an OID";
                    //return 0;
                }
                else {
                    OID clientId = cmdObj["serverID"].__oid();
                    if ( ! info->hasID() ){
                        info->setID( clientId );
                    }
                    else if ( clientId != info->getID() ){
                        errmsg = "server id has changed!";
                        return 0;
                    }
                }
            }

            // SERVER-1633
            laps.push_back( timer.millis() );
            
            unsigned long long version = extractVersion( cmdObj["version"] , errmsg );

            if ( errmsg.size() ){
                return false;
            }
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ){
                errmsg = "need to speciy fully namespace";
                return false;
            }
            
            ConfigVersion& oldVersion = info->getVersion(ns);
            ConfigVersion& globalVersion = shardingState.getVersion(ns);
            
            if ( oldVersion > 0 && globalVersion == 0 ){
                // this had been reset
                oldVersion = 0;
            }

            if ( version == 0 && globalVersion == 0 ){
                // this connection is cleaning itself
                oldVersion = 0;
                return 1;
            }

            // SERVER-1633
            laps.push_back( timer.millis() );

            if ( version == 0 && globalVersion > 0 ){
                if ( ! authoritative ){
                    result.appendBool( "need_authoritative" , true );
                    result.appendTimestamp( "globalVersion" , globalVersion );
                    result.appendTimestamp( "oldVersion" , oldVersion );
                    errmsg = "dropping needs to be authoritative";
                    return 0;
                }
                log() << "wiping data for: " << ns << endl;
                result.appendTimestamp( "beforeDrop" , globalVersion );
                // only setting global version on purpose
                // need clients to re-find meta-data
                globalVersion = 0;
                oldVersion = 0;
                return 1;
            }

            if ( version < oldVersion ){
                errmsg = "you already have a newer version";
                result.appendTimestamp( "oldVersion" , oldVersion );
                result.appendTimestamp( "newVersion" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }
            
            // SERVER-1633
            laps.push_back( timer.millis() );

            if ( version < globalVersion ){
                while ( shardingState.inCriticalMigrateSection() ){
                    dbtemprelease r;
                    sleepmillis(2);
                    log() << "waiting till out of critical section" << endl;
                }
                errmsg = "going to older version for global";
                result.appendTimestamp( "version" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }
            
            if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ){
                // need authoritative for first look
                result.appendBool( "need_authoritative" , true );
                result.append( "ns" , ns );
                errmsg = "first time for this ns";
                return false;
            }

            // SERVER-1633
            laps.push_back( timer.millis() );

            {
                dbtemprelease unlock;
                shardingState.getChunkMatcher( ns );
            }

            result.appendTimestamp( "oldVersion" , oldVersion );
            oldVersion = version;
            globalVersion = version;

            // SERVER-1633
            ostringstream lapString;
            lapString << name /* command name */ << " partials: " ;
            for (size_t i = 1; i<laps.size(); ++i){ 
                lapString << (laps[i] - laps[i-1]) / 1000 << " ";
            }
            lapString << endl;
            logIfSlow( timer, lapString.str() );

            result.append( "ok" , 1 );
            return 1;
        }
Example #10
0
 bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
     shardingState.appendInfo( result );
     return true;
 }
Example #11
0
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            bool authoritative = cmdObj.getBoolField( "authoritative" );

            string configdb = cmdObj["configdb"].valuestrsafe();
            {
                // configdb checking
                if ( configdb.size() == 0 ) {
                    errmsg = "no configdb";
                    return false;
                }

                if ( shardingState.enabled() ) {
                    if ( configdb != shardingState.getConfigServer() ) {
                        errmsg = "specified a different configdb!";
                        return false;
                    }
                }
                else {
                    if ( ! authoritative ) {
                        result.appendBool( "need_authoritative" , true );
                        errmsg = "first setShardVersion";
                        return false;
                    }
                    shardingState.enable( configdb );
                    configServer.init( configdb );
                }
            }

            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }

            {
                // setting up ids
                if ( cmdObj["serverID"].type() != jstOID ) {
                    // TODO: fix this
                    //errmsg = "need serverID to be an OID";
                    //return 0;
                }
                else {
                    OID clientId = cmdObj["serverID"].__oid();
                    if ( ! info->hasID() ) {
                        info->setID( clientId );
                    }
                    else if ( clientId != info->getID() ) {
                        errmsg = "server id has changed!";
                        return 0;
                    }
                }
            }

            unsigned long long version = extractVersion( cmdObj["version"] , errmsg );

            if ( errmsg.size() ) {
                return false;
            }

            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to speciy fully namespace";
                return false;
            }

            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);

            if ( oldVersion > 0 && globalVersion == 0 ) {
                // this had been reset
                info->setVersion( ns , 0 );
            }

            if ( version == 0 && globalVersion == 0 ) {
                // this connection is cleaning itself
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version == 0 && globalVersion > 0 ) {
                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    result.appendTimestamp( "globalVersion" , globalVersion );
                    result.appendTimestamp( "oldVersion" , oldVersion );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                result.appendTimestamp( "beforeDrop" , globalVersion );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.resetVersion( ns );
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version < oldVersion ) {
                errmsg = "you already have a newer version of collection '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "oldVersion" , oldVersion );
                result.appendTimestamp( "newVersion" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }

            if ( version < globalVersion ) {
                while ( shardingState.inCriticalMigrateSection() ) {
                    dbtemprelease r;
                    sleepmillis(2);
                    OCCASIONALLY log() << "waiting till out of critical section" << endl;
                }
                errmsg = "going to older version for global for collection '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "version" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }

            if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ) {
                // need authoritative for first look
                result.append( "ns" , ns );
                result.appendBool( "need_authoritative" , true );
                errmsg = "first time for collection '" + ns + "'";
                return false;
            }

            {
                dbtemprelease unlock;

                ShardChunkVersion currVersion = version;
                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                    errmsg = str::stream() << "client version differs from config's for colleciton '" << ns << "'";
                    result.append( "ns" , ns );
                    result.appendTimestamp( "version" , version );
                    result.appendTimestamp( "globalVersion" , currVersion );
                    return false;
                }
            }

            info->setVersion( ns , version );
            result.appendTimestamp( "oldVersion" , oldVersion );
            result.append( "ok" , 1 );

            return true;
        }
Example #12
0
namespace mongo {

    // -----ShardingState START ----

    ShardingState::ShardingState()
        : _enabled(false) , _mutex( "ShardingState" ) {
    }

    void ShardingState::enable( const string& server ) {
        _enabled = true;
        assert( server.size() );
        if ( _configServer.size() == 0 )
            _configServer = server;
        else {
            assert( server == _configServer );
        }
    }

    void ShardingState::gotShardName( const string& name ) {
        if ( _shardName.size() == 0 ) {
            // TODO SERVER-2299 verify the name is sound w.r.t IPs
            _shardName = name;
            return;
        }

        if ( _shardName == name )
            return;

        stringstream ss;
        ss << "gotShardName different than what i had before "
           << " before [" << _shardName << "] "
           << " got [" << name << "] "
           ;
        uasserted( 13298 , ss.str() );
    }

    void ShardingState::gotShardHost( string host ) {

        size_t slash = host.find( '/' );
        if ( slash != string::npos )
            host = host.substr( 0 , slash );

        if ( _shardHost.size() == 0 ) {
            _shardHost = host;
            return;
        }

        if ( _shardHost == host )
            return;

        stringstream ss;
        ss << "gotShardHost different than what i had before "
           << " before [" << _shardHost << "] "
           << " got [" << host << "] "
           ;
        uasserted( 13299 , ss.str() );
    }

    void ShardingState::resetShardingState() {
        scoped_lock lk(_mutex);
        
        _enabled = false;
        _configServer.clear();
        _shardName.clear();
        _shardHost.clear();
        _chunks.clear();
    }

    // TODO we shouldn't need three ways for checking the version. Fix this.
    bool ShardingState::hasVersion( const string& ns ) {
        scoped_lock lk(_mutex);

        ChunkManagersMap::const_iterator it = _chunks.find(ns);
        return it != _chunks.end();
    }

    bool ShardingState::hasVersion( const string& ns , ConfigVersion& version ) {
        scoped_lock lk(_mutex);

        ChunkManagersMap::const_iterator it = _chunks.find(ns);
        if ( it == _chunks.end() )
            return false;

        ShardChunkManagerPtr p = it->second;
        version = p->getVersion();
        return true;
    }

    const ConfigVersion ShardingState::getVersion( const string& ns ) const {
        scoped_lock lk(_mutex);

        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        if ( it != _chunks.end() ) {
            ShardChunkManagerPtr p = it->second;
            return p->getVersion();
        }
        else {
            return 0;
        }
    }

    void ShardingState::donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version ) {
        scoped_lock lk( _mutex );

        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        assert( it != _chunks.end() ) ;
        ShardChunkManagerPtr p = it->second;

        // empty shards should have version 0
        version = ( p->getNumChunks() > 1 ) ? version : ShardChunkVersion( 0 , 0 );

        ShardChunkManagerPtr cloned( p->cloneMinus( min , max , version ) );
        _chunks[ns] = cloned;
    }

    void ShardingState::undoDonateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version ) {
        scoped_lock lk( _mutex );

        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        assert( it != _chunks.end() ) ;
        ShardChunkManagerPtr p( it->second->clonePlus( min , max , version ) );
        _chunks[ns] = p;
    }

    void ShardingState::splitChunk( const string& ns , const BSONObj& min , const BSONObj& max , const vector<BSONObj>& splitKeys ,
                                    ShardChunkVersion version ) {
        scoped_lock lk( _mutex );

        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        assert( it != _chunks.end() ) ;
        ShardChunkManagerPtr p( it->second->cloneSplit( min , max , splitKeys , version ) );
        _chunks[ns] = p;
    }

    void ShardingState::resetVersion( const string& ns ) {
        scoped_lock lk( _mutex );

        _chunks.erase( ns );
    }

    bool ShardingState::trySetVersion( const string& ns , ConfigVersion& version /* IN-OUT */ ) {

        // fast path - requested version is at the same version as this chunk manager
        //
        // cases:
        //   + this shard updated the version for a migrate's commit (FROM side)
        //     a client reloaded chunk state from config and picked the newest version
        //   + two clients reloaded
        //     one triggered the 'slow path' (below)
        //     when the second's request gets here, the version is already current
        {
            scoped_lock lk( _mutex );
            ChunkManagersMap::const_iterator it = _chunks.find( ns );
            if ( it != _chunks.end() && it->second->getVersion() == version )
                return true;
        }

        // slow path - requested version is different than the current chunk manager's, if one exists, so must check for
        // newest version in the config server
        //
        // cases:
        //   + a chunk moved TO here
        //     (we don't bump up the version on the TO side but the commit to config does use higher version)
        //     a client reloads from config an issued the request
        //   + there was a take over from a secondary
        //     the secondary had no state (managers) at all, so every client request will fall here
        //   + a stale client request a version that's not current anymore

        const string c = (_configServer == _shardHost) ? "" /* local */ : _configServer;
        ShardChunkManagerPtr p( new ShardChunkManager( c , ns , _shardName ) );
        {
            scoped_lock lk( _mutex );

            // since we loaded the chunk manager unlocked, other thread may have done the same
            // make sure we keep the freshest config info only
            ChunkManagersMap::const_iterator it = _chunks.find( ns );
            if ( it == _chunks.end() || p->getVersion() >= it->second->getVersion() ) {
                _chunks[ns] = p;
            }

            ShardChunkVersion oldVersion = version;
            version = p->getVersion();
            return oldVersion == version;
        }
    }

    void ShardingState::appendInfo( BSONObjBuilder& b ) {
        b.appendBool( "enabled" , _enabled );
        if ( ! _enabled )
            return;

        b.append( "configServer" , _configServer );
        b.append( "shardName" , _shardName );
        b.append( "shardHost" , _shardHost );

        {
            BSONObjBuilder bb( b.subobjStart( "versions" ) );

            scoped_lock lk(_mutex);

            for ( ChunkManagersMap::iterator it = _chunks.begin(); it != _chunks.end(); ++it ) {
                ShardChunkManagerPtr p = it->second;
                bb.appendTimestamp( it->first , p->getVersion() );
            }
            bb.done();
        }

    }

    bool ShardingState::needShardChunkManager( const string& ns ) const {
        if ( ! _enabled )
            return false;

        if ( ! ShardedConnectionInfo::get( false ) )
            return false;

        return true;
    }

    ShardChunkManagerPtr ShardingState::getShardChunkManager( const string& ns ) {
        scoped_lock lk( _mutex );

        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        if ( it == _chunks.end() ) {
            return ShardChunkManagerPtr();
        }
        else {
            return it->second;
        }
    }

    ShardingState shardingState;

    // -----ShardingState END ----

    // -----ShardedConnectionInfo START ----

    boost::thread_specific_ptr<ShardedConnectionInfo> ShardedConnectionInfo::_tl;

    ShardedConnectionInfo::ShardedConnectionInfo() {
        _forceVersionOk = false;
        _id.clear();
    }

    ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ) {
        ShardedConnectionInfo* info = _tl.get();
        if ( ! info && create ) {
            log(1) << "entering shard mode for connection" << endl;
            info = new ShardedConnectionInfo();
            _tl.reset( info );
        }
        return info;
    }

    void ShardedConnectionInfo::reset() {
        _tl.reset();
    }

    const ConfigVersion ShardedConnectionInfo::getVersion( const string& ns ) const {
        NSVersionMap::const_iterator it = _versions.find( ns );
        if ( it != _versions.end() ) {
            return it->second;
        }
        else {
            return 0;
        }
    }

    void ShardedConnectionInfo::setVersion( const string& ns , const ConfigVersion& version ) {
        _versions[ns] = version;
    }

    void ShardedConnectionInfo::setID( const OID& id ) {
        _id = id;
    }

    // -----ShardedConnectionInfo END ----

    unsigned long long extractVersion( BSONElement e , string& errmsg ) {
        if ( e.eoo() ) {
            errmsg = "no version";
            return 0;
        }

        if ( e.isNumber() )
            return (unsigned long long)e.number();

        if ( e.type() == Date || e.type() == Timestamp )
            return e._numberLong();


        errmsg = "version is not a numeric type";
        return 0;
    }

    class MongodShardCommand : public Command {
    public:
        MongodShardCommand( const char * n ) : Command( n ) {
        }
        virtual bool slaveOk() const {
            return false;
        }
        virtual bool adminOnly() const {
            return true;
        }
    };


    bool haveLocalShardingInfo( const string& ns ) {
        if ( ! shardingState.enabled() )
            return false;

        if ( ! shardingState.hasVersion( ns ) )
            return false;

        return ShardedConnectionInfo::get(false) > 0;
    }

    class UnsetShardingCommand : public MongodShardCommand {
    public:
        UnsetShardingCommand() : MongodShardCommand("unsetSharding") {}

        virtual void help( stringstream& help ) const {
            help << " example: { unsetSharding : 1 } ";
        }

        virtual LockType locktype() const { return NONE; }

        virtual bool slaveOk() const { return true; }

        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
            ShardedConnectionInfo::reset();
            return true;
        }

    } unsetShardingCommand;

    class SetShardVersion : public MongodShardCommand {
    public:
        SetShardVersion() : MongodShardCommand("setShardVersion") {}

        virtual void help( stringstream& help ) const {
            help << " example: { setShardVersion : 'alleyinsider.foo' , version : 1 , configdb : '' } ";
        }

        virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock

        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            bool authoritative = cmdObj.getBoolField( "authoritative" );

            string configdb = cmdObj["configdb"].valuestrsafe();
            {
                // configdb checking
                if ( configdb.size() == 0 ) {
                    errmsg = "no configdb";
                    return false;
                }

                if ( shardingState.enabled() ) {
                    if ( configdb != shardingState.getConfigServer() ) {
                        errmsg = "specified a different configdb!";
                        return false;
                    }
                }
                else {
                    if ( ! authoritative ) {
                        result.appendBool( "need_authoritative" , true );
                        errmsg = "first setShardVersion";
                        return false;
                    }
                    shardingState.enable( configdb );
                    configServer.init( configdb );
                }
            }

            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }

            {
                // setting up ids
                if ( cmdObj["serverID"].type() != jstOID ) {
                    // TODO: fix this
                    //errmsg = "need serverID to be an OID";
                    //return 0;
                }
                else {
                    OID clientId = cmdObj["serverID"].__oid();
                    if ( ! info->hasID() ) {
                        info->setID( clientId );
                    }
                    else if ( clientId != info->getID() ) {
                        errmsg = "server id has changed!";
                        return 0;
                    }
                }
            }

            unsigned long long version = extractVersion( cmdObj["version"] , errmsg );

            if ( errmsg.size() ) {
                return false;
            }

            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to speciy fully namespace";
                return false;
            }

            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);

            if ( oldVersion > 0 && globalVersion == 0 ) {
                // this had been reset
                info->setVersion( ns , 0 );
            }

            if ( version == 0 && globalVersion == 0 ) {
                // this connection is cleaning itself
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version == 0 && globalVersion > 0 ) {
                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    result.appendTimestamp( "globalVersion" , globalVersion );
                    result.appendTimestamp( "oldVersion" , oldVersion );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                result.appendTimestamp( "beforeDrop" , globalVersion );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.resetVersion( ns );
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version < oldVersion ) {
                errmsg = "you already have a newer version of collection '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "oldVersion" , oldVersion );
                result.appendTimestamp( "newVersion" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }

            if ( version < globalVersion ) {
                while ( shardingState.inCriticalMigrateSection() ) {
                    dbtemprelease r;
                    sleepmillis(2);
                    OCCASIONALLY log() << "waiting till out of critical section" << endl;
                }
                errmsg = "going to older version for global for collection '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "version" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }

            if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ) {
                // need authoritative for first look
                result.append( "ns" , ns );
                result.appendBool( "need_authoritative" , true );
                errmsg = "first time for collection '" + ns + "'";
                return false;
            }

            {
                dbtemprelease unlock;

                ShardChunkVersion currVersion = version;
                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                    errmsg = str::stream() << "client version differs from config's for colleciton '" << ns << "'";
                    result.append( "ns" , ns );
                    result.appendTimestamp( "version" , version );
                    result.appendTimestamp( "globalVersion" , currVersion );
                    return false;
                }
            }

            info->setVersion( ns , version );
            result.appendTimestamp( "oldVersion" , oldVersion );
            result.append( "ok" , 1 );

            return true;
        }

    } setShardVersionCmd;

    class GetShardVersion : public MongodShardCommand {
    public:
        GetShardVersion() : MongodShardCommand("getShardVersion") {}

        virtual void help( stringstream& help ) const {
            help << " example: { getShardVersion : 'alleyinsider.foo'  } ";
        }

        virtual LockType locktype() const { return NONE; }

        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
            string ns = cmdObj["getShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to speciy fully namespace";
                return false;
            }

            result.append( "configServer" , shardingState.getConfigServer() );

            result.appendTimestamp( "global" , shardingState.getVersion(ns) );

            ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
            if ( info )
                result.appendTimestamp( "mine" , info->getVersion(ns) );
            else
                result.appendTimestamp( "mine" , 0 );

            return true;
        }

    } getShardVersion;

    class ShardingStateCmd : public MongodShardCommand {
    public:
        ShardingStateCmd() : MongodShardCommand( "shardingState" ) {}

        virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock

        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
            shardingState.appendInfo( result );
            return true;
        }

    } shardingStateCmd;

    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ) {
        if ( ! shardingState.enabled() )
            return true;

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO
        //   all collections at some point, be sharded or not, will have a version (and a ShardChunkManager)
        //   for now, we remove the sharding state of dropped collection
        //   so delayed request may come in. This has to be fixed.
        ConfigVersion clientVersion = info->getVersion(ns);
        ConfigVersion version;
        if ( ! shardingState.hasVersion( ns , version ) && clientVersion == 0 ) {
            return true;
        }


        if ( version == 0 && clientVersion > 0 ) {
            stringstream ss;
            ss << "collection was dropped or this shard no longer valied version: " << version << " clientVersion: " << clientVersion;
            errmsg = ss.str();
            return false;
        }

        if ( clientVersion >= version )
            return true;


        if ( clientVersion == 0 ) {
            stringstream ss;
            ss << "client in sharded mode, but doesn't have version set for this collection: " << ns << " myVersion: " << version;
            errmsg = ss.str();
            return false;
        }

        if ( version.majorVersion() == clientVersion.majorVersion() ) {
            // this means there was just a split
            // since on a split w/o a migrate this server is ok
            // going to accept 
            return true;
        }

        stringstream ss;
        ss << "your version is too old  ns: " + ns << " global: " << version << " client: " << clientVersion;
        errmsg = ss.str();
        return false;
    }

}
Example #13
0
/**
 * @ return true if not in sharded mode
                 or if version for this client is ok
 */
bool shardVersionOk( const string& ns , string& errmsg, ConfigVersion& received, ConfigVersion& wanted ) {
    if ( ! shardingState.enabled() )
        return true;

    if ( ! isMasterNs( ns.c_str() ) )  {
        // right now connections to secondaries aren't versioned at all
        return true;
    }

    ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

    if ( ! info ) {
        // this means the client has nothing sharded
        // so this allows direct connections to do whatever they want
        // which i think is the correct behavior
        return true;
    }

    if ( info->inForceVersionOkMode() ) {
        return true;
    }

    // TODO
    //   all collections at some point, be sharded or not, will have a version (and a ShardChunkManager)
    //   for now, we remove the sharding state of dropped collection
    //   so delayed request may come in. This has to be fixed.
    ConfigVersion clientVersion = info->getVersion(ns);
    ConfigVersion version;
    if ( ! shardingState.hasVersion( ns , version ) && ! clientVersion.isSet() ) {
        return true;
    }

    // The versions we're going to compare, saved for future use
    received = clientVersion;
    wanted = version;

    if ( ! version.isSet() && clientVersion.isSet() ) {
        stringstream ss;
        ss << "collection was dropped or this shard no longer valid version";
        errmsg = ss.str();
        return false;
    }

    if ( clientVersion >= version )
        return true;


    if ( ! clientVersion.isSet() ) {
        stringstream ss;
        ss << "client in sharded mode, but doesn't have version set for this collection";
        errmsg = ss.str();
        return false;
    }

    if ( version.majorVersion() == clientVersion.majorVersion() ) {
        // this means there was just a split
        // since on a split w/o a migrate this server is ok
        // going to accept
        return true;
    }

    stringstream ss;
    ss << "your version is too old";
    errmsg = ss.str();
    return false;
}
Example #14
0
namespace mongo {

    // -----ShardingState START ----
    
    ShardingState::ShardingState()
        : _enabled(false) , _mutex( "ShardingState" ){
    }
    
    void ShardingState::enable( const string& server ){
        _enabled = true;
        assert( server.size() );
        if ( _configServer.size() == 0 )
            _configServer = server;
        else {
            assert( server == _configServer );
        }
    }
    
    void ShardingState::gotShardName( const string& name ){
        if ( _shardName.size() == 0 ){
            _shardName = name;
            return;
        }
        
        if ( _shardName == name )
            return;

        stringstream ss;
        ss << "gotShardName different than what i had before " 
           << " before [" << _shardName << "] " 
           << " got [" << name << "] " 
            ;
        uasserted( 13298 , ss.str() );
    }
    
    void ShardingState::gotShardHost( string host ){
        
        size_t slash = host.find( '/' );
        if ( slash != string::npos )
            host = host.substr( 0 , slash );

        if ( _shardHost.size() == 0 ){
            _shardHost = host;
            return;
        }
        
        if ( _shardHost == host )
            return;

        stringstream ss;
        ss << "gotShardHost different than what i had before " 
           << " before [" << _shardHost << "] " 
           << " got [" << host << "] " 
            ;
        uasserted( 13299 , ss.str() );
    }
    
    bool ShardingState::hasVersion( const string& ns ){
        scoped_lock lk(_mutex);

        NSVersionMap::const_iterator i = _versions.find(ns);
        return i != _versions.end();
    }
    
    bool ShardingState::hasVersion( const string& ns , ConfigVersion& version ){
        scoped_lock lk(_mutex);

        NSVersionMap::const_iterator i = _versions.find(ns);
        if ( i == _versions.end() )
            return false;
        version = i->second;
        return true;
    }
    
    const ConfigVersion ShardingState::getVersion( const string& ns ) const {
        scoped_lock lk(_mutex);

        NSVersionMap::const_iterator it = _versions.find( ns );
        if ( it != _versions.end() ) {
            return it->second;
        } else {
            return 0;
        }
    }
    
    void ShardingState::setVersion( const string& ns , const ConfigVersion& version ){
        scoped_lock lk(_mutex);

        if ( version != 0 ) {
            NSVersionMap::const_iterator it = _versions.find( ns );

            // TODO 11-18-2010 as we're bringing chunk boundary information to mongod, it may happen that
            // we're setting a version for the ns that the shard knows about already (e.g because it set
            // it itself in a chunk migration)
            // eventually, the only cases to issue a setVersion would be 
            // 1) First chunk of a collection, for version 1|0
            // 2) Drop of a collection, for version 0|0
            // 3) Load of the shard's chunk state, in a primary-secondary failover
            assert( it == _versions.end() || version >= it->second );
        }

        _versions[ns] = version;
    }

    void ShardingState::appendInfo( BSONObjBuilder& b ){
        b.appendBool( "enabled" , _enabled );
        if ( ! _enabled )
            return;

        b.append( "configServer" , _configServer );
        b.append( "shardName" , _shardName );
        b.append( "shardHost" , _shardHost );

        {
            BSONObjBuilder bb( b.subobjStart( "versions" ) );
            
            scoped_lock lk(_mutex);

            for ( NSVersionMap::iterator i=_versions.begin(); i!=_versions.end(); ++i ){
                bb.appendTimestamp( i->first , i->second );
            }
            bb.done();
        }

    }

    bool ShardingState::needShardChunkManager( const string& ns ) const {
        if ( ! _enabled )
            return false;
        
        if ( ! ShardedConnectionInfo::get( false ) )
            return false;

        return true;
    }

    ShardChunkManagerPtr ShardingState::getShardChunkManager( const string& ns ){
        ConfigVersion version;
        { 
            // check cache
            scoped_lock lk( _mutex );

            NSVersionMap::const_iterator it = _versions.find( ns );
            if ( it == _versions.end() ) {
                return ShardChunkManagerPtr();
            }

            version = it->second;

            // TODO SERVER-1849 pending drop work
            // the manager should use the cached version only if the versions match exactly
            ShardChunkManagerPtr p = _chunks[ns];
            if ( p && p->getVersion() >= version ){
                // our cached version is good, so just return
                return p;                
            }
        }

        // load the chunk information for this shard from the config database
        // a reminder: ShardChunkManager may throw on construction
        const string c = (_configServer == _shardHost) ? "" /* local */ : _configServer;
        ShardChunkManagerPtr p( new ShardChunkManager( c , ns , _shardName ) );

        // TODO SERVER-1849 verify that the manager's version is exactly the one requested
        // If not, do update _chunks, but fail the request.
        { 
            scoped_lock lk( _mutex );
            _chunks[ns] = p;
        }

        return p;
    }

    ShardingState shardingState;

    // -----ShardingState END ----
    
    // -----ShardedConnectionInfo START ----

    boost::thread_specific_ptr<ShardedConnectionInfo> ShardedConnectionInfo::_tl;

    ShardedConnectionInfo::ShardedConnectionInfo(){
        _forceVersionOk = false;
        _id.clear();
    }
    
    ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ){
        ShardedConnectionInfo* info = _tl.get();
        if ( ! info && create ){
            log(1) << "entering shard mode for connection" << endl;
            info = new ShardedConnectionInfo();
            _tl.reset( info );
        }
        return info;
    }

    void ShardedConnectionInfo::reset(){
        _tl.reset();
    }

    const ConfigVersion ShardedConnectionInfo::getVersion( const string& ns ) const {
        NSVersionMap::const_iterator it = _versions.find( ns );
        if ( it != _versions.end() ) {
            return it->second;
        } else {
            return 0;
        }
    }
    
    void ShardedConnectionInfo::setVersion( const string& ns , const ConfigVersion& version ){
        _versions[ns] = version;
    }

    void ShardedConnectionInfo::setID( const OID& id ){
        _id = id;
    }

    // -----ShardedConnectionInfo END ----

    unsigned long long extractVersion( BSONElement e , string& errmsg ){
        if ( e.eoo() ){
            errmsg = "no version";
            return 0;
        }
        
        if ( e.isNumber() )
            return (unsigned long long)e.number();
        
        if ( e.type() == Date || e.type() == Timestamp )
            return e._numberLong();

        
        errmsg = "version is not a numeric type";
        return 0;
    }

    class MongodShardCommand : public Command {
    public:
        MongodShardCommand( const char * n ) : Command( n ){
        }
        virtual bool slaveOk() const {
            return false;
        }
        virtual bool adminOnly() const {
            return true;
        }
    };
    
    
    bool haveLocalShardingInfo( const string& ns ){
        if ( ! shardingState.enabled() )
            return false;
        
        if ( ! shardingState.hasVersion( ns ) )
            return false;

        return ShardedConnectionInfo::get(false) > 0;
    }

    class UnsetShardingCommand : public MongodShardCommand {
    public:
        UnsetShardingCommand() : MongodShardCommand("unsetSharding"){}

        virtual void help( stringstream& help ) const {
            help << " example: { unsetSharding : 1 } ";
        }
        
        virtual LockType locktype() const { return NONE; } 
 
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
            ShardedConnectionInfo::reset();
            return true;
        } 
    
    } unsetShardingCommand;

    
    class SetShardVersion : public MongodShardCommand {
    public:
        SetShardVersion() : MongodShardCommand("setShardVersion"){}

        virtual void help( stringstream& help ) const {
            help << " example: { setShardVersion : 'alleyinsider.foo' , version : 1 , configdb : '' } ";
        }
        
        virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock
 
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            bool authoritative = cmdObj.getBoolField( "authoritative" );

            string configdb = cmdObj["configdb"].valuestrsafe();
            { // configdb checking
                if ( configdb.size() == 0 ){
                    errmsg = "no configdb";
                    return false;
                }
                
                if ( shardingState.enabled() ){
                    if ( configdb != shardingState.getConfigServer() ){
                        errmsg = "specified a different configdb!";
                        return false;
                    }
                }
                else {
                    if ( ! authoritative ){
                        result.appendBool( "need_authoritative" , true );
                        errmsg = "first setShardVersion";
                        return false;
                    }
                    shardingState.enable( configdb );
                    configServer.init( configdb );
                }
            }

            if ( cmdObj["shard"].type() == String ){
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }

            { // setting up ids
                if ( cmdObj["serverID"].type() != jstOID ){
                    // TODO: fix this
                    //errmsg = "need serverID to be an OID";
                    //return 0;
                }
                else {
                    OID clientId = cmdObj["serverID"].__oid();
                    if ( ! info->hasID() ){
                        info->setID( clientId );
                    }
                    else if ( clientId != info->getID() ){
                        errmsg = "server id has changed!";
                        return 0;
                    }
                }
            }

            unsigned long long version = extractVersion( cmdObj["version"] , errmsg );

            if ( errmsg.size() ){
                return false;
            }
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ){
                errmsg = "need to speciy fully namespace";
                return false;
            }
            
            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);
            
            if ( oldVersion > 0 && globalVersion == 0 ){
                // this had been reset
                info->setVersion( ns , 0 );
            }

            if ( version == 0 && globalVersion == 0 ){
                // this connection is cleaning itself
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version == 0 && globalVersion > 0 ){
                if ( ! authoritative ){
                    result.appendBool( "need_authoritative" , true );
                    result.appendTimestamp( "globalVersion" , globalVersion );
                    result.appendTimestamp( "oldVersion" , oldVersion );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                result.appendTimestamp( "beforeDrop" , globalVersion );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.setVersion( ns , 0 );
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version < oldVersion ){
                errmsg = "you already have a newer version";
                result.appendTimestamp( "oldVersion" , oldVersion );
                result.appendTimestamp( "newVersion" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }
            
            if ( version < globalVersion ){
                while ( shardingState.inCriticalMigrateSection() ){
                    dbtemprelease r;
                    sleepmillis(2);
                    log() << "waiting till out of critical section" << endl;
                }
                errmsg = "going to older version for global";
                result.appendTimestamp( "version" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }
            
            if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ){
                // need authoritative for first look
                result.appendBool( "need_authoritative" , true );
                result.append( "ns" , ns );
                errmsg = "first time for this ns";
                return false;
            }

            result.appendTimestamp( "oldVersion" , oldVersion );
            result.append( "ok" , 1 );

            info->setVersion( ns , version );
            shardingState.setVersion( ns , version );

            // TODO SERVER-1849 pending drop work
            // getShardChunkManager is assuming that the setVersion above were valid
            // ideally, we'd call getShardChunkManager first, verify that 'version' is sound, and then update
            // connection and global state
            {
                dbtemprelease unlock;
                shardingState.getShardChunkManager( ns );
            }

            return true;
        }
        
    } setShardVersionCmd;
    
    class GetShardVersion : public MongodShardCommand {
    public:
        GetShardVersion() : MongodShardCommand("getShardVersion"){}

        virtual void help( stringstream& help ) const {
            help << " example: { getShardVersion : 'alleyinsider.foo'  } ";
        }
        
        virtual LockType locktype() const { return NONE; } 

        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
            string ns = cmdObj["getShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ){
                errmsg = "need to speciy fully namespace";
                return false;
            }
            
            result.append( "configServer" , shardingState.getConfigServer() );

            result.appendTimestamp( "global" , shardingState.getVersion(ns) );
            
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
            if ( info )
                result.appendTimestamp( "mine" , info->getVersion(ns) );
            else 
                result.appendTimestamp( "mine" , 0 );
            
            return true;
        }
        
    } getShardVersion;

    class ShardingStateCmd : public MongodShardCommand {
    public:
        ShardingStateCmd() : MongodShardCommand( "shardingState" ){}

        virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock

        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){        
            shardingState.appendInfo( result );
            return true;
        }
        
    } shardingStateCmd;

    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ){
        if ( ! shardingState.enabled() )
            return true;

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ){
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }
        
        if ( info->inForceVersionOkMode() ){
            return true;
        }

        ConfigVersion version;    
        if ( ! shardingState.hasVersion( ns , version ) ){
            return true;
        }

        ConfigVersion clientVersion = info->getVersion(ns);

        if ( version == 0 && clientVersion > 0 ){
            stringstream ss;
            ss << "collection was dropped or this shard no longer valied version: " << version << " clientVersion: " << clientVersion;
            errmsg = ss.str();
            return false;
        }
        
        if ( clientVersion >= version )
            return true;
        

        if ( clientVersion == 0 ){
            stringstream ss;
            ss << "client in sharded mode, but doesn't have version set for this collection: " << ns << " myVersion: " << version;
            errmsg = ss.str();
            return false;
        }

        if ( isWriteOp && version.majorVersion() == clientVersion.majorVersion() ){
            // this means there was just a split 
            // since on a split w/o a migrate this server is ok
            // going to accept write
            return true;
        }

        stringstream ss;
        ss << "your version is too old  ns: " + ns << " global: " << version << " client: " << clientVersion;
        errmsg = ss.str();
        return false;
    }

}
Example #15
0
bool mergeChunks(OperationContext* txn,
                 const NamespaceString& nss,
                 const BSONObj& minKey,
                 const BSONObj& maxKey,
                 const OID& epoch,
                 string* errMsg) {
    // Get the distributed lock
    string whyMessage = stream() << "merging chunks in " << nss.ns() << " from " << minKey << " to "
                                 << maxKey;
    auto scopedDistLock = grid.catalogManager(txn)->distLock(
        txn, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout);

    if (!scopedDistLock.isOK()) {
        *errMsg = stream() << "could not acquire collection lock for " << nss.ns()
                           << " to merge chunks in [" << minKey << "," << maxKey << ")"
                           << causedBy(scopedDistLock.getStatus());

        warning() << *errMsg;
        return false;
    }

    ShardingState* shardingState = ShardingState::get(txn);

    //
    // We now have the collection lock, refresh metadata to latest version and sanity check
    //

    ChunkVersion shardVersion;
    Status status = shardingState->refreshMetadataNow(txn, nss.ns(), &shardVersion);

    if (!status.isOK()) {
        *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for "
                                << nss.ns() << causedBy(status.reason());

        warning() << *errMsg;
        return false;
    }

    if (epoch.isSet() && shardVersion.epoch() != epoch) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has changed"
                           << " since merge was sent"
                           << "(sent epoch : " << epoch.toString()
                           << ", current epoch : " << shardVersion.epoch().toString() << ")";

        warning() << *errMsg;
        return false;
    }

    shared_ptr<CollectionMetadata> metadata = shardingState->getCollectionMetadata(nss.ns());

    if (!metadata || metadata->getKeyPattern().isEmpty()) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " is not sharded";

        warning() << *errMsg;
        return false;
    }

    dassert(metadata->getShardVersion().equals(shardVersion));

    if (!metadata->isValidKey(minKey) || !metadata->isValidKey(maxKey)) {
        *errMsg = stream() << "could not merge chunks, the range " << rangeToString(minKey, maxKey)
                           << " is not valid"
                           << " for collection " << nss.ns() << " with key pattern "
                           << metadata->getKeyPattern();

        warning() << *errMsg;
        return false;
    }

    //
    // Get merged chunk information
    //

    ChunkVersion mergeVersion = metadata->getCollVersion();
    mergeVersion.incMinor();

    std::vector<ChunkType> chunksToMerge;

    ChunkType itChunk;
    itChunk.setMin(minKey);
    itChunk.setMax(minKey);
    itChunk.setNS(nss.ns());
    itChunk.setShard(shardingState->getShardName());

    while (itChunk.getMax().woCompare(maxKey) < 0 &&
           metadata->getNextChunk(itChunk.getMax(), &itChunk)) {
        chunksToMerge.push_back(itChunk);
    }

    if (chunksToMerge.empty()) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " range starting at " << minKey << " and ending at " << maxKey
                           << " does not belong to shard " << shardingState->getShardName();

        warning() << *errMsg;
        return false;
    }

    //
    // Validate the range starts and ends at chunks and has no holes, error if not valid
    //

    BSONObj firstDocMin = chunksToMerge.front().getMin();
    BSONObj firstDocMax = chunksToMerge.front().getMax();
    // minKey is inclusive
    bool minKeyInRange = rangeContains(firstDocMin, firstDocMax, minKey);

    if (!minKeyInRange) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " range starting at " << minKey << " does not belong to shard "
                           << shardingState->getShardName();

        warning() << *errMsg;
        return false;
    }

    BSONObj lastDocMin = chunksToMerge.back().getMin();
    BSONObj lastDocMax = chunksToMerge.back().getMax();
    // maxKey is exclusive
    bool maxKeyInRange = lastDocMin.woCompare(maxKey) < 0 && lastDocMax.woCompare(maxKey) >= 0;

    if (!maxKeyInRange) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " range ending at " << maxKey << " does not belong to shard "
                           << shardingState->getShardName();

        warning() << *errMsg;
        return false;
    }

    bool validRangeStartKey = firstDocMin.woCompare(minKey) == 0;
    bool validRangeEndKey = lastDocMax.woCompare(maxKey) == 0;

    if (!validRangeStartKey || !validRangeEndKey) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " does not contain a chunk "
                           << (!validRangeStartKey ? "starting at " + minKey.toString() : "")
                           << (!validRangeStartKey && !validRangeEndKey ? " or " : "")
                           << (!validRangeEndKey ? "ending at " + maxKey.toString() : "");

        warning() << *errMsg;
        return false;
    }

    if (chunksToMerge.size() == 1) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " already contains chunk for " << rangeToString(minKey, maxKey);

        warning() << *errMsg;
        return false;
    }

    // Look for hole in range
    for (size_t i = 1; i < chunksToMerge.size(); ++i) {
        if (chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) != 0) {
            *errMsg =
                stream() << "could not merge chunks, collection " << nss.ns()
                         << " has a hole in the range " << rangeToString(minKey, maxKey) << " at "
                         << rangeToString(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin());

            warning() << *errMsg;
            return false;
        }
    }

    //
    // Run apply ops command
    //
    Status applyOpsStatus = runApplyOpsCmd(txn, chunksToMerge, shardVersion, mergeVersion);
    if (!applyOpsStatus.isOK()) {
        warning() << applyOpsStatus;
        return false;
    }

    //
    // Install merged chunk metadata
    //

    {
        ScopedTransaction transaction(txn, MODE_IX);
        Lock::DBLock writeLk(txn->lockState(), nss.db(), MODE_IX);
        Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_X);

        shardingState->mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion);
    }

    //
    // Log change
    //

    BSONObj mergeLogEntry = buildMergeLogEntry(chunksToMerge, shardVersion, mergeVersion);

    grid.catalogManager(txn)->logChange(txn, "merge", nss.ns(), mergeLogEntry);

    return true;
}
Example #16
0
    bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

        // Steps
        // 1. check basic config
        // 2. extract params from command
        // 3. fast check
        // 4. slow check (LOCKS)

        // step 1

        lastError.disableForCommand();
        ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

        // make sure we have the mongos id for writebacks
        if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) )
            return false;

        bool authoritative = cmdObj.getBoolField( "authoritative" );

        // check config server is ok or enable sharding
        if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
            return false;

        // check shard name/hosts are correct
        if ( cmdObj["shard"].type() == String ) {
            shardingState.gotShardName( cmdObj["shard"].String() );
            shardingState.gotShardHost( cmdObj["shardHost"].String() );
        }


        // Handle initial shard connection
        if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ) {
            result.append( "initialized", true );
            return true;
        }

        // we can run on a slave up to here
        if ( ! isMaster( "admin" ) ) {
            result.append( "errmsg" , "not master" );
            result.append( "note" , "from post init in setShardVersion" );
            return false;
        }

        // step 2

        string ns = cmdObj["setShardVersion"].valuestrsafe();
        if ( ns.size() == 0 ) {
            errmsg = "need to specify namespace";
            return false;
        }

        const ConfigVersion version = ConfigVersion( extractVersion( cmdObj["version"] , errmsg ), OID() );
        if ( errmsg.size() )
            return false;

        // step 3

        const ConfigVersion oldVersion = info->getVersion(ns);
        const ConfigVersion globalVersion = shardingState.getVersion(ns);

        oldVersion.addToBSON( result, "oldVersion" );

        if ( globalVersion.isSet() && version.isSet() ) {
            // this means there is no reset going on an either side
            // so its safe to make some assumptions

            if ( version.isEquivalentTo( globalVersion ) ) {
                // mongos and mongod agree!
                if ( ! oldVersion.isEquivalentTo( version ) ) {
                    if ( oldVersion < globalVersion ) {
                        info->setVersion( ns , version );
                    }
                    else if ( authoritative ) {
                        // this means there was a drop and our version is reset
                        info->setVersion( ns , version );
                    }
                    else {
                        result.append( "ns" , ns );
                        result.appendBool( "need_authoritative" , true );
                        errmsg = "verifying drop on '" + ns + "'";
                        return false;
                    }
                }
                return true;
            }

        }

        // step 4

        // this is because of a weird segfault I saw and I can't see why this should ever be set
        massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 );

        Lock::GlobalWrite setShardVersionLock; // TODO: can we get rid of this??

        if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
            // this had been reset
            info->setVersion( ns , ShardChunkVersion( 0, OID() ) );
        }

        if ( ! version.isSet() && ! globalVersion.isSet() ) {
            // this connection is cleaning itself
            info->setVersion( ns , ShardChunkVersion( 0, OID() ) );
            return true;
        }

        if ( ! version.isSet() && globalVersion.isSet() ) {
            if ( ! authoritative ) {
                result.appendBool( "need_authoritative" , true );
                result.append( "ns" , ns );
                globalVersion.addToBSON( result, "globalVersion" );
                errmsg = "dropping needs to be authoritative";
                return false;
            }
            log() << "wiping data for: " << ns << endl;
            globalVersion.addToBSON( result, "beforeDrop" );
            // only setting global version on purpose
            // need clients to re-find meta-data
            shardingState.resetVersion( ns );
            info->setVersion( ns , ShardChunkVersion( 0, OID() ) );
            return true;
        }

        if ( version < oldVersion ) {
            errmsg = "this connection already had a newer version of collection '" + ns + "'";
            result.append( "ns" , ns );
            version.addToBSON( result, "newVersion" );
            globalVersion.addToBSON( result, "globalVersion" );
            return false;
        }

        if ( version < globalVersion ) {
            while ( shardingState.inCriticalMigrateSection() ) {
                dbtemprelease r;
                sleepmillis(2);
                OCCASIONALLY log() << "waiting till out of critical section" << endl;
            }
            errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
            result.append( "ns" , ns );
            version.addToBSON( result, "version" );
            globalVersion.addToBSON( result, "globalVersion" );
            result.appendBool( "reloadConfig" , true );
            return false;
        }

        if ( ! globalVersion.isSet() && ! authoritative ) {
            // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
            // should require a reload.
            // TODO: Maybe a more elegant way of doing this
            while ( shardingState.inCriticalMigrateSection() ) {
                dbtemprelease r;
                sleepmillis(2);
                OCCASIONALLY log() << "waiting till out of critical section for version reset" << endl;
            }

            // need authoritative for first look
            result.append( "ns" , ns );
            result.appendBool( "need_authoritative" , true );
            errmsg = "first time for collection '" + ns + "'";
            return false;
        }

        Timer relockTime;
        {
            dbtemprelease unlock;

            ShardChunkVersion currVersion = version;
            if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                errmsg = str::stream() << "client version differs from config's for collection '" << ns << "'";
                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                globalVersion.addToBSON( result, "globalVersion" );
                return false;
            }
        }
        if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
            log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
        }

        info->setVersion( ns , version );
        return true;
    }
Example #17
0
namespace mongo {

// -----ShardingState START ----

ShardingState::ShardingState()
    : _enabled(false) , _mutex( "ShardingState" ),
      _configServerTickets( 3 /* max number of concurrent config server refresh threads */ ) {
}

void ShardingState::enable( const string& server ) {
    _enabled = true;
    verify( server.size() );
    if ( _configServer.size() == 0 )
        _configServer = server;
    else {
        verify( server == _configServer );
    }
}

void ShardingState::gotShardName( const string& name ) {
    scoped_lock lk(_mutex);
    if ( _shardName.size() == 0 ) {
        // TODO SERVER-2299 verify the name is sound w.r.t IPs
        _shardName = name;
        return;
    }

    if ( _shardName == name )
        return;

    stringstream ss;
    ss << "gotShardName different than what i had before "
       << " before [" << _shardName << "] "
       << " got [" << name << "] "
       ;
    msgasserted( 13298 , ss.str() );
}

void ShardingState::gotShardHost( string host ) {
    scoped_lock lk(_mutex);
    size_t slash = host.find( '/' );
    if ( slash != string::npos )
        host = host.substr( 0 , slash );

    if ( _shardHost.size() == 0 ) {
        _shardHost = host;
        return;
    }

    if ( _shardHost == host )
        return;

    stringstream ss;
    ss << "gotShardHost different than what i had before "
       << " before [" << _shardHost << "] "
       << " got [" << host << "] "
       ;
    msgasserted( 13299 , ss.str() );
}

void ShardingState::resetShardingState() {
    scoped_lock lk(_mutex);

    _enabled = false;
    _configServer.clear();
    _shardName.clear();
    _shardHost.clear();
    _chunks.clear();
}

// TODO we shouldn't need three ways for checking the version. Fix this.
bool ShardingState::hasVersion( const string& ns ) {
    scoped_lock lk(_mutex);

    ChunkManagersMap::const_iterator it = _chunks.find(ns);
    return it != _chunks.end();
}

bool ShardingState::hasVersion( const string& ns , ConfigVersion& version ) {
    scoped_lock lk(_mutex);

    ChunkManagersMap::const_iterator it = _chunks.find(ns);
    if ( it == _chunks.end() )
        return false;

    ShardChunkManagerPtr p = it->second;
    version = p->getVersion();
    return true;
}

const ConfigVersion ShardingState::getVersion( const string& ns ) const {
    scoped_lock lk(_mutex);

    ChunkManagersMap::const_iterator it = _chunks.find( ns );
    if ( it != _chunks.end() ) {
        ShardChunkManagerPtr p = it->second;
        return p->getVersion();
    }
    else {
        return ConfigVersion( 0, OID() );
    }
}

void ShardingState::donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version ) {
    scoped_lock lk( _mutex );

    ChunkManagersMap::const_iterator it = _chunks.find( ns );
    verify( it != _chunks.end() ) ;
    ShardChunkManagerPtr p = it->second;

    // empty shards should have version 0
    version = ( p->getNumChunks() > 1 ) ? version : ShardChunkVersion( 0 , OID() );

    ShardChunkManagerPtr cloned( p->cloneMinus( min , max , version ) );
    _chunks[ns] = cloned;
}

void ShardingState::undoDonateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version ) {
    scoped_lock lk( _mutex );

    ChunkManagersMap::const_iterator it = _chunks.find( ns );
    verify( it != _chunks.end() ) ;
    ShardChunkManagerPtr p( it->second->clonePlus( min , max , version ) );
    _chunks[ns] = p;
}

void ShardingState::splitChunk( const string& ns , const BSONObj& min , const BSONObj& max , const vector<BSONObj>& splitKeys ,
                                ShardChunkVersion version ) {
    scoped_lock lk( _mutex );

    ChunkManagersMap::const_iterator it = _chunks.find( ns );
    verify( it != _chunks.end() ) ;
    ShardChunkManagerPtr p( it->second->cloneSplit( min , max , splitKeys , version ) );
    _chunks[ns] = p;
}

void ShardingState::resetVersion( const string& ns ) {
    scoped_lock lk( _mutex );

    _chunks.erase( ns );
}

bool ShardingState::trySetVersion( const string& ns , ConfigVersion& version /* IN-OUT */ ) {

    // Currently this function is called after a getVersion(), which is the first "check", and the assumption here
    // is that we don't do anything nearly as long as a remote query in a thread between then and now.
    // Otherwise it may be worth adding an additional check without the _configServerMutex below, since then it
    // would be likely that the version may have changed in the meantime without waiting for or fetching config results.

    // TODO:  Mutex-per-namespace?

    LOG( 2 ) << "trying to set shard version of " << version.toString() << " for '" << ns << "'" << endl;

    _configServerTickets.waitForTicket();
    TicketHolderReleaser needTicketFrom( &_configServerTickets );

    // fast path - double-check if requested version is at the same version as this chunk manager before verifying
    // against config server
    //
    // This path will short-circuit the version set if another thread already managed to update the version in the
    // meantime.  First check is from getVersion().
    //
    // cases:
    //   + this shard updated the version for a migrate's commit (FROM side)
    //     a client reloaded chunk state from config and picked the newest version
    //   + two clients reloaded
    //     one triggered the 'slow path' (below)
    //     when the second's request gets here, the version is already current
    ConfigVersion storedVersion;
    ShardChunkManagerPtr currManager;
    {
        scoped_lock lk( _mutex );
        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        if ( it != _chunks.end() ) currManager = it->second;
        if ( it != _chunks.end() && ( storedVersion = it->second->getVersion() ).isEquivalentTo( version ) )
            return true;
    }

    LOG( 2 ) << "verifying cached version " << storedVersion.toString() << " and new version " << version.toString() << " for '" << ns << "'" << endl;

    // slow path - requested version is different than the current chunk manager's, if one exists, so must check for
    // newest version in the config server
    //
    // cases:
    //   + a chunk moved TO here
    //     (we don't bump up the version on the TO side but the commit to config does use higher version)
    //     a client reloads from config an issued the request
    //   + there was a take over from a secondary
    //     the secondary had no state (managers) at all, so every client request will fall here
    //   + a stale client request a version that's not current anymore

    // Can't lock default mutex while creating ShardChunkManager, b/c may have to create a new connection to myself
    const string c = (_configServer == _shardHost) ? "" /* local */ : _configServer;
    ShardChunkManagerPtr p( new ShardChunkManager( c , ns , _shardName, currManager ) );

    {
        scoped_lock lk( _mutex );

        // since we loaded the chunk manager unlocked, other thread may have done the same
        // make sure we keep the freshest config info only
        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        if ( it == _chunks.end() || p->getVersion() >= it->second->getVersion() ) {
            _chunks[ns] = p;
        }

        ShardChunkVersion oldVersion = version;
        version = p->getVersion();
        return oldVersion.isEquivalentTo( version );
    }
}

void ShardingState::appendInfo( BSONObjBuilder& b ) {
    b.appendBool( "enabled" , _enabled );
    if ( ! _enabled )
        return;

    b.append( "configServer" , _configServer );
    b.append( "shardName" , _shardName );
    b.append( "shardHost" , _shardHost );

    {
        BSONObjBuilder bb( b.subobjStart( "versions" ) );

        scoped_lock lk(_mutex);

        for ( ChunkManagersMap::iterator it = _chunks.begin(); it != _chunks.end(); ++it ) {
            ShardChunkManagerPtr p = it->second;
            bb.appendTimestamp( it->first , p->getVersion().toLong() );
        }
        bb.done();
    }

}

bool ShardingState::needShardChunkManager( const string& ns ) const {
    if ( ! _enabled )
        return false;

    if ( ! ShardedConnectionInfo::get( false ) )
        return false;

    return true;
}

ShardChunkManagerPtr ShardingState::getShardChunkManager( const string& ns ) {
    scoped_lock lk( _mutex );

    ChunkManagersMap::const_iterator it = _chunks.find( ns );
    if ( it == _chunks.end() ) {
        return ShardChunkManagerPtr();
    }
    else {
        return it->second;
    }
}

ShardingState shardingState;

// -----ShardingState END ----

// -----ShardedConnectionInfo START ----

boost::thread_specific_ptr<ShardedConnectionInfo> ShardedConnectionInfo::_tl;

ShardedConnectionInfo::ShardedConnectionInfo() {
    _forceVersionOk = false;
    _id.clear();
}

ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ) {
    ShardedConnectionInfo* info = _tl.get();
    if ( ! info && create ) {
        LOG(1) << "entering shard mode for connection" << endl;
        info = new ShardedConnectionInfo();
        _tl.reset( info );
    }
    return info;
}

void ShardedConnectionInfo::reset() {
    _tl.reset();
}

const ConfigVersion ShardedConnectionInfo::getVersion( const string& ns ) const {
    NSVersionMap::const_iterator it = _versions.find( ns );
    if ( it != _versions.end() ) {
        return it->second;
    }
    else {
        return ConfigVersion( 0, OID() );
    }
}

void ShardedConnectionInfo::setVersion( const string& ns , const ConfigVersion& version ) {
    _versions[ns] = version;
}

void ShardedConnectionInfo::addHook() {
    static bool done = false;
    if (!done) {
        LOG(1) << "adding sharding hook" << endl;
        pool.addHook(new ShardingConnectionHook(false));
        shardConnectionPool.addHook(new ShardingConnectionHook(true));
        done = true;
    }
}

void ShardedConnectionInfo::setID( const OID& id ) {
    _id = id;
}

// -----ShardedConnectionInfo END ----

unsigned long long extractVersion( BSONElement e , string& errmsg ) {
    if ( e.eoo() ) {
        errmsg = "no version";
        return 0;
    }

    if ( e.isNumber() )
        return (unsigned long long)e.number();

    if ( e.type() == Date || e.type() == Timestamp )
        return e._numberLong();


    errmsg = "version is not a numeric type";
    return 0;
}

class MongodShardCommand : public Command {
public:
    MongodShardCommand( const char * n ) : Command( n ) {
    }
    virtual bool slaveOk() const {
        return false;
    }
    virtual bool adminOnly() const {
        return true;
    }
};


bool haveLocalShardingInfo( const string& ns ) {
    if ( ! shardingState.enabled() )
        return false;

    if ( ! shardingState.hasVersion( ns ) )
        return false;

    return ShardedConnectionInfo::get(false) > 0;
}

class UnsetShardingCommand : public MongodShardCommand {
public:
    UnsetShardingCommand() : MongodShardCommand("unsetSharding") {}

    virtual void help( stringstream& help ) const {
        help << " example: { unsetSharding : 1 } ";
    }

    virtual LockType locktype() const {
        return NONE;
    }

    virtual bool slaveOk() const {
        return true;
    }

    bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
        ShardedConnectionInfo::reset();
        return true;
    }

} unsetShardingCommand;

class SetShardVersion : public MongodShardCommand {
public:
    SetShardVersion() : MongodShardCommand("setShardVersion") {}

    virtual void help( stringstream& help ) const {
        help << " example: { setShardVersion : 'alleyinsider.foo' , version : 1 , configdb : '' } ";
    }

    virtual bool slaveOk() const {
        return true;
    }
    virtual LockType locktype() const {
        return NONE;
    }

    bool checkConfigOrInit( const string& configdb , bool authoritative , string& errmsg , BSONObjBuilder& result , bool locked=false ) const {
        if ( configdb.size() == 0 ) {
            errmsg = "no configdb";
            return false;
        }

        if ( shardingState.enabled() ) {
            if ( configdb == shardingState.getConfigServer() )
                return true;

            result.append( "configdb" , BSON( "stored" << shardingState.getConfigServer() <<
                                              "given" << configdb ) );

            errmsg = str::stream() << "mongos specified a different config database string : "
                     << "stored : " << shardingState.getConfigServer()
                     << " vs given : " << configdb;
            return false;
        }

        if ( ! authoritative ) {
            result.appendBool( "need_authoritative" , true );
            errmsg = "first setShardVersion";
            return false;
        }

        if ( locked ) {
            ShardedConnectionInfo::addHook();
            shardingState.enable( configdb );
            configServer.init( configdb );
            return true;
        }

        Lock::GlobalWrite lk;
        return checkConfigOrInit( configdb , authoritative , errmsg , result , true );
    }

    bool checkMongosID( ShardedConnectionInfo* info, const BSONElement& id, string& errmsg ) {
        if ( id.type() != jstOID ) {
            if ( ! info->hasID() ) {
                warning() << "bad serverID set in setShardVersion and none in info: " << id << endl;
            }
            // TODO: fix this
            //errmsg = "need serverID to be an OID";
            //return 0;
            return true;
        }

        OID clientId = id.__oid();
        if ( ! info->hasID() ) {
            info->setID( clientId );
            return true;
        }

        if ( clientId != info->getID() ) {
            errmsg = "server id has changed!";
            return false;
        }

        return true;
    }

    bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

        // Steps
        // 1. check basic config
        // 2. extract params from command
        // 3. fast check
        // 4. slow check (LOCKS)

        // step 1

        lastError.disableForCommand();
        ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

        // make sure we have the mongos id for writebacks
        if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) )
            return false;

        bool authoritative = cmdObj.getBoolField( "authoritative" );

        // check config server is ok or enable sharding
        if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
            return false;

        // check shard name/hosts are correct
        if ( cmdObj["shard"].type() == String ) {
            shardingState.gotShardName( cmdObj["shard"].String() );
            shardingState.gotShardHost( cmdObj["shardHost"].String() );
        }


        // Handle initial shard connection
        if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ) {
            result.append( "initialized", true );
            return true;
        }

        // we can run on a slave up to here
        if ( ! isMaster( "admin" ) ) {
            result.append( "errmsg" , "not master" );
            result.append( "note" , "from post init in setShardVersion" );
            return false;
        }

        // step 2

        string ns = cmdObj["setShardVersion"].valuestrsafe();
        if ( ns.size() == 0 ) {
            errmsg = "need to specify namespace";
            return false;
        }

        const ConfigVersion version = ConfigVersion( extractVersion( cmdObj["version"] , errmsg ), OID() );
        if ( errmsg.size() )
            return false;

        // step 3

        const ConfigVersion oldVersion = info->getVersion(ns);
        const ConfigVersion globalVersion = shardingState.getVersion(ns);

        oldVersion.addToBSON( result, "oldVersion" );

        if ( globalVersion.isSet() && version.isSet() ) {
            // this means there is no reset going on an either side
            // so its safe to make some assumptions

            if ( version.isEquivalentTo( globalVersion ) ) {
                // mongos and mongod agree!
                if ( ! oldVersion.isEquivalentTo( version ) ) {
                    if ( oldVersion < globalVersion ) {
                        info->setVersion( ns , version );
                    }
                    else if ( authoritative ) {
                        // this means there was a drop and our version is reset
                        info->setVersion( ns , version );
                    }
                    else {
                        result.append( "ns" , ns );
                        result.appendBool( "need_authoritative" , true );
                        errmsg = "verifying drop on '" + ns + "'";
                        return false;
                    }
                }
                return true;
            }

        }

        // step 4

        // this is because of a weird segfault I saw and I can't see why this should ever be set
        massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 );

        Lock::GlobalWrite setShardVersionLock; // TODO: can we get rid of this??

        if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
            // this had been reset
            info->setVersion( ns , ShardChunkVersion( 0, OID() ) );
        }

        if ( ! version.isSet() && ! globalVersion.isSet() ) {
            // this connection is cleaning itself
            info->setVersion( ns , ShardChunkVersion( 0, OID() ) );
            return true;
        }

        if ( ! version.isSet() && globalVersion.isSet() ) {
            if ( ! authoritative ) {
                result.appendBool( "need_authoritative" , true );
                result.append( "ns" , ns );
                globalVersion.addToBSON( result, "globalVersion" );
                errmsg = "dropping needs to be authoritative";
                return false;
            }
            log() << "wiping data for: " << ns << endl;
            globalVersion.addToBSON( result, "beforeDrop" );
            // only setting global version on purpose
            // need clients to re-find meta-data
            shardingState.resetVersion( ns );
            info->setVersion( ns , ShardChunkVersion( 0, OID() ) );
            return true;
        }

        if ( version < oldVersion ) {
            errmsg = "this connection already had a newer version of collection '" + ns + "'";
            result.append( "ns" , ns );
            version.addToBSON( result, "newVersion" );
            globalVersion.addToBSON( result, "globalVersion" );
            return false;
        }

        if ( version < globalVersion ) {
            while ( shardingState.inCriticalMigrateSection() ) {
                dbtemprelease r;
                sleepmillis(2);
                OCCASIONALLY log() << "waiting till out of critical section" << endl;
            }
            errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
            result.append( "ns" , ns );
            version.addToBSON( result, "version" );
            globalVersion.addToBSON( result, "globalVersion" );
            result.appendBool( "reloadConfig" , true );
            return false;
        }

        if ( ! globalVersion.isSet() && ! authoritative ) {
            // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
            // should require a reload.
            // TODO: Maybe a more elegant way of doing this
            while ( shardingState.inCriticalMigrateSection() ) {
                dbtemprelease r;
                sleepmillis(2);
                OCCASIONALLY log() << "waiting till out of critical section for version reset" << endl;
            }

            // need authoritative for first look
            result.append( "ns" , ns );
            result.appendBool( "need_authoritative" , true );
            errmsg = "first time for collection '" + ns + "'";
            return false;
        }

        Timer relockTime;
        {
            dbtemprelease unlock;

            ShardChunkVersion currVersion = version;
            if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                errmsg = str::stream() << "client version differs from config's for collection '" << ns << "'";
                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                globalVersion.addToBSON( result, "globalVersion" );
                return false;
            }
        }
        if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
            log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
        }

        info->setVersion( ns , version );
        return true;
    }

} setShardVersionCmd;

class GetShardVersion : public MongodShardCommand {
public:
    GetShardVersion() : MongodShardCommand("getShardVersion") {}

    virtual void help( stringstream& help ) const {
        help << " example: { getShardVersion : 'alleyinsider.foo'  } ";
    }

    virtual LockType locktype() const {
        return NONE;
    }

    bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
        string ns = cmdObj["getShardVersion"].valuestrsafe();
        if ( ns.size() == 0 ) {
            errmsg = "need to specify full namespace";
            return false;
        }

        result.append( "configServer" , shardingState.getConfigServer() );

        result.appendTimestamp( "global" , shardingState.getVersion(ns).toLong() );

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
        result.appendBool( "inShardedMode" , info != 0 );
        if ( info )
            result.appendTimestamp( "mine" , info->getVersion(ns).toLong() );
        else
            result.appendTimestamp( "mine" , 0 );

        return true;
    }

} getShardVersion;

class ShardingStateCmd : public MongodShardCommand {
public:
    ShardingStateCmd() : MongodShardCommand( "shardingState" ) {}

    virtual LockType locktype() const {
        return WRITE;    // TODO: figure out how to make this not need to lock
    }

    bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
        shardingState.appendInfo( result );
        return true;
    }

} shardingStateCmd;

/**
 * @ return true if not in sharded mode
                 or if version for this client is ok
 */
bool shardVersionOk( const string& ns , string& errmsg, ConfigVersion& received, ConfigVersion& wanted ) {
    if ( ! shardingState.enabled() )
        return true;

    if ( ! isMasterNs( ns.c_str() ) )  {
        // right now connections to secondaries aren't versioned at all
        return true;
    }

    ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

    if ( ! info ) {
        // this means the client has nothing sharded
        // so this allows direct connections to do whatever they want
        // which i think is the correct behavior
        return true;
    }

    if ( info->inForceVersionOkMode() ) {
        return true;
    }

    // TODO
    //   all collections at some point, be sharded or not, will have a version (and a ShardChunkManager)
    //   for now, we remove the sharding state of dropped collection
    //   so delayed request may come in. This has to be fixed.
    ConfigVersion clientVersion = info->getVersion(ns);
    ConfigVersion version;
    if ( ! shardingState.hasVersion( ns , version ) && ! clientVersion.isSet() ) {
        return true;
    }

    // The versions we're going to compare, saved for future use
    received = clientVersion;
    wanted = version;

    if ( ! version.isSet() && clientVersion.isSet() ) {
        stringstream ss;
        ss << "collection was dropped or this shard no longer valid version";
        errmsg = ss.str();
        return false;
    }

    if ( clientVersion >= version )
        return true;


    if ( ! clientVersion.isSet() ) {
        stringstream ss;
        ss << "client in sharded mode, but doesn't have version set for this collection";
        errmsg = ss.str();
        return false;
    }

    if ( version.majorVersion() == clientVersion.majorVersion() ) {
        // this means there was just a split
        // since on a split w/o a migrate this server is ok
        // going to accept
        return true;
    }

    stringstream ss;
    ss << "your version is too old";
    errmsg = ss.str();
    return false;
}

void ShardingConnectionHook::onHandedOut( DBClientBase * conn ) {
    // no-op for mongod
}
}
Example #18
0
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }
            
            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) )
                return false;

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to speciy namespace";
                return false;
            }

            const ConfigVersion version = extractVersion( cmdObj["version"] , errmsg );
            if ( errmsg.size() )
                return false;
            
            // step 3

            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);

            result.appendTimestamp( "oldVersion" , oldVersion );
            
            if ( globalVersion > 0 && version > 0 ) {
                // this means there is no reset going on an either side
                // so its safe to make some assuptions

                if ( version == globalVersion ) {
                    // mongos and mongod agree!
                    if ( oldVersion != version ) {
                        assert( oldVersion < globalVersion );
                        info->setVersion( ns , version );
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            dblock setShardVersionLock; // TODO: can we get rid of this??
            
            if ( oldVersion > 0 && globalVersion == 0 ) {
                // this had been reset
                info->setVersion( ns , 0 );
            }

            if ( version == 0 && globalVersion == 0 ) {
                // this connection is cleaning itself
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version == 0 && globalVersion > 0 ) {
                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    result.appendTimestamp( "globalVersion" , globalVersion );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                result.appendTimestamp( "beforeDrop" , globalVersion );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.resetVersion( ns );
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version < oldVersion ) {
                errmsg = "this connection already had a newer version of collection '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "newVersion" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }

            if ( version < globalVersion ) {
                while ( shardingState.inCriticalMigrateSection() ) {
                    dbtemprelease r;
                    sleepmillis(2);
                    OCCASIONALLY log() << "waiting till out of critical section" << endl;
                }
                errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "version" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                result.appendBool( "reloadConfig" , true );
                return false;
            }

            if ( globalVersion == 0 && ! authoritative ) {
                // need authoritative for first look
                result.append( "ns" , ns );
                result.appendBool( "need_authoritative" , true );
                errmsg = "first time for collection '" + ns + "'";
                return false;
            }

            Timer relockTime;
            {
                dbtemprelease unlock;

                ShardChunkVersion currVersion = version;
                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                    errmsg = str::stream() << "client version differs from config's for colleciton '" << ns << "'";
                    result.append( "ns" , ns );
                    result.appendTimestamp( "version" , version );
                    result.appendTimestamp( "globalVersion" , currVersion );
                    return false;
                }
            }
            if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
                log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
            }
            
            info->setVersion( ns , version );
            return true;
        }
Example #19
0
        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) 
                return false;

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
            }
            
            // Handle initial shard connection
            if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){

                result.append( "initialized", true );

                // Send back wire version to let mongos know what protocol we can speak
                result.append( "minWireVersion", minWireVersion );
                result.append( "maxWireVersion", maxWireVersion );

                return true;
            }

            // we can run on a slave up to here
            if ( ! isMaster( "admin" ) ) {
                result.append( "errmsg" , "not master" );
                result.append( "note" , "from post init in setShardVersion" );
                return false;
            }

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify namespace";
                return false;
            }

            if( ! ChunkVersion::canParseBSON( cmdObj, "version" ) ){
                errmsg = "need to specify version";
                return false;
            }

            const ChunkVersion version = ChunkVersion::fromBSON( cmdObj, "version" );
            
            // step 3

            const ChunkVersion oldVersion = info->getVersion(ns);
            const ChunkVersion globalVersion = shardingState.getVersion(ns);

            oldVersion.addToBSON( result, "oldVersion" );
            
            if ( globalVersion.isSet() && version.isSet() ) {
                // this means there is no reset going on an either side
                // so its safe to make some assumptions

                if ( version.isWriteCompatibleWith( globalVersion ) ) {
                    // mongos and mongod agree!
                    if ( ! oldVersion.isWriteCompatibleWith( version ) ) {
                        if ( oldVersion < globalVersion &&
                             oldVersion.hasCompatibleEpoch(globalVersion) )
                        {
                            info->setVersion( ns , version );
                        }
                        else if ( authoritative ) {
                            // this means there was a drop and our version is reset
                            info->setVersion( ns , version );
                        }
                        else {
                            result.append( "ns" , ns );
                            result.appendBool( "need_authoritative" , true );
                            errmsg = "verifying drop on '" + ns + "'";
                            return false;
                        }
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
                // this had been reset
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
            }

            if ( ! version.isSet() && ! globalVersion.isSet() ) {
                // this connection is cleaning itself
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            // Cases below all either return OR fall-through to remote metadata reload.
            if ( version.isSet() || !globalVersion.isSet() ) {

                // Not Dropping

                // TODO: Refactor all of this
                if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) {
                    errmsg = "this connection already had a newer version of collection '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "newVersion" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    return false;
                }

                // TODO: Refactor all of this
                if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) {
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }
                    errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "version" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    result.appendBool( "reloadConfig" , true );
                    return false;
                }

                if ( ! globalVersion.isSet() && ! authoritative ) {
                    // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
                    // should require a reload.
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }

                    // need authoritative for first look
                    result.append( "ns" , ns );
                    result.appendBool( "need_authoritative" , true );
                    errmsg = "first time for collection '" + ns + "'";
                    return false;
                }

                // Fall through to metadata reload below
            }
            else {

                // Dropping

                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    globalVersion.addToBSON( result, "globalVersion" );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }

                // Fall through to metadata reload below
            }

            ChunkVersion currVersion;
            Status status = shardingState.refreshMetadataIfNeeded( ns, version, &currVersion );

            if (!status.isOK()) {

                // The reload itself was interrupted or confused here

                errmsg = str::stream() << "could not refresh metadata for " << ns
                                       << " with requested shard version " << version.toString()
                                       << ", stored shard version is " << currVersion.toString()
                                       << causedBy( status.reason() );

                warning() << errmsg << endl;

                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                currVersion.addToBSON( result, "globalVersion" );
                result.appendBool( "reloadConfig", true );

                return false;
            }
            else if ( !version.isWriteCompatibleWith( currVersion ) ) {

                // We reloaded a version that doesn't match the version mongos was trying to
                // set.

                errmsg = str::stream() << "requested shard version differs from"
                                       << " config shard version for " << ns
                                       << ", requested version is " << version.toString()
                                       << " but found version " << currVersion.toString();

                OCCASIONALLY warning() << errmsg << endl;

                // WARNING: the exact fields below are important for compatibility with mongos
                // version reload.

                result.append( "ns" , ns );
                currVersion.addToBSON( result, "globalVersion" );

                // If this was a reset of a collection or the last chunk moved out, inform mongos to
                // do a full reload.
                if (currVersion.epoch() != version.epoch() || !currVersion.isSet() ) {
                    result.appendBool( "reloadConfig", true );
                    // Zero-version also needed to trigger full mongos reload, sadly
                    // TODO: Make this saner, and less impactful (full reload on last chunk is bad)
                    ChunkVersion( 0, 0, OID() ).addToBSON( result, "version" );
                    // For debugging
                    version.addToBSON( result, "origVersion" );
                }
                else {
                    version.addToBSON( result, "version" );
                }

                return false;
            }

            info->setVersion( ns , version );
            return true;
        }
Example #20
0
namespace mongo {

    // -----ShardingState START ----

    ShardingState::ShardingState()
        : _enabled(false) , _mutex( "ShardingState" ),
          _configServerTickets( 3 /* max number of concurrent config server refresh threads */ ) {
    }

    void ShardingState::enable( const string& server ) {
        scoped_lock lk(_mutex);

        _enabled = true;
        verify( server.size() );
        if ( _configServer.size() == 0 )
            _configServer = server;
        else {
            verify( server == _configServer );
        }
    }

    void ShardingState::initialize(const string& server) {
        ShardedConnectionInfo::addHook();
        shardingState.enable(server);
        configServer.init(server);
    }

    bool ShardingState::setShardName( const string& name ) {
        scoped_lock lk(_mutex);
        if ( _shardName.size() == 0 ) {
            // TODO SERVER-2299 remotely verify the name is sound w.r.t IPs
            _shardName = name;

            string clientAddr = cc().clientAddress(true);
            log() << "remote client " << clientAddr << " initialized this host as shard " << name;
            return true;
        }

        if ( _shardName == name )
            return true;

        string clientAddr = cc().clientAddress(true);
        warning() << "remote client " << clientAddr << " tried to initialize this host as shard "
                  << name << ", but shard name was previously initialized as " << _shardName;

        return false;
    }

    void ShardingState::gotShardName( const string& name ) {
        if ( setShardName( name ) )
            return;

        string clientAddr = cc().clientAddress(true);
        stringstream ss;

        // Same error as above, to match for reporting
        ss << "remote client " << clientAddr << " tried to initialize this host as shard " << name
           << ", but shard name was previously initialized as " << _shardName;
        msgasserted( 13298 , ss.str() );
    }

    void ShardingState::resetShardingState() {
        scoped_lock lk(_mutex);
        
        _enabled = false;
        _configServer.clear();
        _shardName.clear();
        _collMetadata.clear();
    }

    // TODO we shouldn't need three ways for checking the version. Fix this.
    bool ShardingState::hasVersion( const string& ns ) {
        scoped_lock lk(_mutex);

        CollectionMetadataMap::const_iterator it = _collMetadata.find(ns);
        return it != _collMetadata.end();
    }

    bool ShardingState::hasVersion( const string& ns , ChunkVersion& version ) {
        scoped_lock lk(_mutex);

        CollectionMetadataMap::const_iterator it = _collMetadata.find(ns);
        if ( it == _collMetadata.end() )
            return false;

        CollectionMetadataPtr p = it->second;
        version = p->getShardVersion();
        return true;
    }

    const ChunkVersion ShardingState::getVersion( const string& ns ) const {
        scoped_lock lk(_mutex);

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        if ( it != _collMetadata.end() ) {
            CollectionMetadataPtr p = it->second;
            return p->getShardVersion();
        }
        else {
            return ChunkVersion( 0, OID() );
        }
    }

    void ShardingState::donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ChunkVersion version ) {
        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        verify( it != _collMetadata.end() ) ;
        CollectionMetadataPtr p = it->second;

        // empty shards should have version 0
        version =
                ( p->getNumChunks() > 1 ) ?
                        version : ChunkVersion( 0, 0, p->getCollVersion().epoch() );

        ChunkType chunk;
        chunk.setMin( min );
        chunk.setMax( max );
        string errMsg;

        CollectionMetadataPtr cloned( p->cloneMigrate( chunk, version, &errMsg ) );
        // uassert to match old behavior, TODO: report errors w/o throwing
        uassert( 16855, errMsg, NULL != cloned.get() );

        // TODO: a bit dangerous to have two different zero-version states - no-metadata and
        // no-version
        _collMetadata[ns] = cloned;
    }

    void ShardingState::undoDonateChunk( const string& ns, CollectionMetadataPtr prevMetadata ) {
        scoped_lock lk( _mutex );
        log() << "ShardingState::undoDonateChunk acquired _mutex" << endl;

        CollectionMetadataMap::iterator it = _collMetadata.find( ns );
        verify( it != _collMetadata.end() );
        it->second = prevMetadata;
    }

    bool ShardingState::notePending( const string& ns,
                                     const BSONObj& min,
                                     const BSONObj& max,
                                     const OID& epoch,
                                     string* errMsg ) {
        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        if ( it == _collMetadata.end() ) {

            *errMsg = str::stream() << "could not note chunk " << "[" << min << "," << max << ")"
                                    << " as pending because the local metadata for " << ns
                                    << " has changed";

            return false;
        }

        CollectionMetadataPtr metadata = it->second;

        // This can currently happen because drops aren't synchronized with in-migrations
        // The idea for checking this here is that in the future we shouldn't have this problem
        if ( metadata->getCollVersion().epoch() != epoch ) {

            *errMsg = str::stream() << "could not note chunk " << "[" << min << "," << max << ")"
                                    << " as pending because the epoch for " << ns
                                    << " has changed from "
                                    << epoch << " to " << metadata->getCollVersion().epoch();

            return false;
        }

        ChunkType chunk;
        chunk.setMin( min );
        chunk.setMax( max );

        CollectionMetadataPtr cloned( metadata->clonePlusPending( chunk, errMsg ) );
        if ( !cloned ) return false;

        _collMetadata[ns] = cloned;
        return true;
    }

    bool ShardingState::forgetPending( const string& ns,
                                       const BSONObj& min,
                                       const BSONObj& max,
                                       const OID& epoch,
                                       string* errMsg ) {
        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        if ( it == _collMetadata.end() ) {

            *errMsg = str::stream() << "no need to forget pending chunk "
                                    << "[" << min << "," << max << ")"
                                    << " because the local metadata for " << ns << " has changed";

            return false;
        }

        CollectionMetadataPtr metadata = it->second;

        // This can currently happen because drops aren't synchronized with in-migrations
        // The idea for checking this here is that in the future we shouldn't have this problem
        if ( metadata->getCollVersion().epoch() != epoch ) {

            *errMsg = str::stream() << "no need to forget pending chunk "
                                    << "[" << min << "," << max << ")"
                                    << " because the epoch for " << ns << " has changed from "
                                    << epoch << " to " << metadata->getCollVersion().epoch();

            return false;
        }

        ChunkType chunk;
        chunk.setMin( min );
        chunk.setMax( max );

        CollectionMetadataPtr cloned( metadata->cloneMinusPending( chunk, errMsg ) );
        if ( !cloned ) return false;

        _collMetadata[ns] = cloned;
        return true;
    }

    void ShardingState::splitChunk( const string& ns,
                                    const BSONObj& min,
                                    const BSONObj& max,
                                    const vector<BSONObj>& splitKeys,
                                    ChunkVersion version )
    {
        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        verify( it != _collMetadata.end() ) ;

        ChunkType chunk;
        chunk.setMin( min );
        chunk.setMax( max );
        string errMsg;

        CollectionMetadataPtr cloned( it->second->cloneSplit( chunk, splitKeys, version, &errMsg ) );
        // uassert to match old behavior, TODO: report errors w/o throwing
        uassert( 16857, errMsg, NULL != cloned.get() );

        _collMetadata[ns] = cloned;
    }

    void ShardingState::mergeChunks( const string& ns,
                                     const BSONObj& minKey,
                                     const BSONObj& maxKey,
                                     ChunkVersion mergedVersion ) {

        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        verify( it != _collMetadata.end() );

        string errMsg;

        CollectionMetadataPtr cloned( it->second->cloneMerge( minKey,
                                                              maxKey,
                                                              mergedVersion,
                                                              &errMsg ) );
        // uassert to match old behavior, TODO: report errors w/o throwing
        uassert( 17004, errMsg, NULL != cloned.get() );

        _collMetadata[ns] = cloned;
    }

    void ShardingState::resetMetadata( const string& ns ) {
        scoped_lock lk( _mutex );

        warning() << "resetting metadata for " << ns << ", this should only be used in testing"
                  << endl;

        _collMetadata.erase( ns );
    }

    Status ShardingState::refreshMetadataIfNeeded( const string& ns,
                                                   const ChunkVersion& reqShardVersion,
                                                   ChunkVersion* latestShardVersion )
    {
        // The _configServerTickets serializes this process such that only a small number of threads
        // can try to refresh at the same time.

        LOG( 2 ) << "metadata refresh requested for " << ns << " at shard version "
                 << reqShardVersion << endl;

        //
        // Queuing of refresh requests starts here when remote reload is needed. This may take time.
        // TODO: Explicitly expose the queuing discipline.
        //

        _configServerTickets.waitForTicket();
        TicketHolderReleaser needTicketFrom( &_configServerTickets );

        //
        // Fast path - check if the requested version is at a higher version than the current
        // metadata version or a different epoch before verifying against config server.
        //

        CollectionMetadataPtr storedMetadata;
        {
            scoped_lock lk( _mutex );
            CollectionMetadataMap::iterator it = _collMetadata.find( ns );
            if ( it != _collMetadata.end() ) storedMetadata = it->second;
        }
        ChunkVersion storedShardVersion;
        if ( storedMetadata ) storedShardVersion = storedMetadata->getShardVersion();
        *latestShardVersion = storedShardVersion;

        if ( storedShardVersion >= reqShardVersion &&
             storedShardVersion.epoch() == reqShardVersion.epoch() ) {

            // Don't need to remotely reload if we're in the same epoch with a >= version
            return Status::OK();
        }

        //
        // Slow path - remotely reload
        //
        // Cases:
        // A) Initial config load and/or secondary take-over.
        // B) Migration TO this shard finished, notified by mongos.
        // C) Dropping a collection, notified (currently) by mongos.
        // D) Stale client wants to reload metadata with a different *epoch*, so we aren't sure.

        if ( storedShardVersion.epoch() != reqShardVersion.epoch() ) {
            // Need to remotely reload if our epochs aren't the same, to verify
            LOG( 1 ) << "metadata change requested for " << ns << ", from shard version "
                     << storedShardVersion << " to " << reqShardVersion
                     << ", need to verify with config server" << endl;
        }
        else {
            // Need to remotely reload since our epochs aren't the same but our version is greater
            LOG( 1 ) << "metadata version update requested for " << ns
                     << ", from shard version " << storedShardVersion << " to " << reqShardVersion
                     << ", need to verify with config server" << endl;
        }

        return doRefreshMetadata( ns, reqShardVersion, true, latestShardVersion );
    }

    Status ShardingState::refreshMetadataNow( const string& ns, ChunkVersion* latestShardVersion )
    {
        return doRefreshMetadata( ns, ChunkVersion( 0, 0, OID() ), false, latestShardVersion );
    }

    Status ShardingState::doRefreshMetadata( const string& ns,
                                             const ChunkVersion& reqShardVersion,
                                             bool useRequestedVersion,
                                             ChunkVersion* latestShardVersion )
    {
        // The idea here is that we're going to reload the metadata from the config server, but
        // we need to do so outside any locks.  When we get our result back, if the current metadata
        // has changed, we may not be able to install the new metadata.

        //
        // Get the initial metadata
        // No DBLock is needed since the metadata is expected to change during reload.
        //

        CollectionMetadataPtr beforeMetadata;
        string shardName;
        {
            scoped_lock lk( _mutex );
            CollectionMetadataMap::iterator it = _collMetadata.find( ns );
            if ( it != _collMetadata.end() ) beforeMetadata = it->second;
            shardName = _shardName;
        }

        ChunkVersion beforeShardVersion;
        ChunkVersion beforeCollVersion;
        if ( beforeMetadata ) {
            beforeShardVersion = beforeMetadata->getShardVersion();
            beforeCollVersion = beforeMetadata->getCollVersion();
        }

        *latestShardVersion = beforeShardVersion;

        // We can't reload without a shard name.  Must check here before loading, since shard name
        // may have changed if we checked it earlier and released the _mutex.
        if ( shardName.empty() ) {

            string errMsg = str::stream() << "cannot refresh metadata for " << ns
                                          << " before shard name has been set";

            LOG( 0 ) << errMsg << endl;
            return Status( ErrorCodes::IllegalOperation, errMsg );
        }

        //
        // Determine whether we need to diff or fully reload
        //

        bool fullReload = false;
        if ( !beforeMetadata ) {
            // We don't have any metadata to reload from
            fullReload = true;
        }
        else if ( useRequestedVersion && reqShardVersion.epoch() != beforeShardVersion.epoch() ) {
            // It's not useful to use the metadata as a base because we think the epoch will differ
            fullReload = true;
        }

        //
        // Load the metadata from the remote server, start construction
        //

        LOG( 0 ) << "remotely refreshing metadata for " << ns
                 << ( useRequestedVersion ?
                      string( " with requested shard version " ) + reqShardVersion.toString() : "" )
                 << ( fullReload ?
                      ", current shard version is " : " based on current shard version " )
                 << beforeShardVersion
                 << ", current metadata version is " << beforeCollVersion << endl;

        string errMsg;
        ConnectionString configServerLoc = ConnectionString::parse( _configServer, errMsg );
        MetadataLoader mdLoader( configServerLoc );
        CollectionMetadata* remoteMetadataRaw = new CollectionMetadata();
        CollectionMetadataPtr remoteMetadata( remoteMetadataRaw );

        Timer refreshTimer;
        Status status =
                mdLoader.makeCollectionMetadata( ns,
                                                 shardName,
                                                 ( fullReload ? NULL : beforeMetadata.get() ),
                                                 remoteMetadataRaw );
        long long refreshMillis = refreshTimer.millis();

        if ( status.code() == ErrorCodes::NamespaceNotFound ) {
            remoteMetadata.reset();
            remoteMetadataRaw = NULL;
        }
        else if ( !status.isOK() ) {

            warning() << "could not remotely refresh metadata for " << ns
                      << causedBy( status.reason() ) << endl;

            return status;
        }

        ChunkVersion remoteShardVersion;
        ChunkVersion remoteCollVersion;
        if ( remoteMetadata ) {
            remoteShardVersion = remoteMetadata->getShardVersion();
            remoteCollVersion = remoteMetadata->getCollVersion();
        }

        //
        // Get ready to install loaded metadata if needed
        //

        CollectionMetadataPtr afterMetadata;
        ChunkVersion afterShardVersion;
        ChunkVersion afterCollVersion;
        ChunkVersion::VersionChoice choice;

        // If we choose to install the new metadata, this describes the kind of install
        enum InstallType {
            InstallType_New, InstallType_Update, InstallType_Replace, InstallType_Drop,
            InstallType_None
        } installType = InstallType_None; // compiler complains otherwise

        {
            // DBLock needed since we're now potentially changing the metadata, and don't want
            // reads/writes to be ongoing.
            Lock::DBWrite writeLk( ns );

            //
            // Get the metadata now that the load has completed
            //

            scoped_lock lk( _mutex );
            CollectionMetadataMap::iterator it = _collMetadata.find( ns );
            if ( it != _collMetadata.end() ) afterMetadata = it->second;

            if ( afterMetadata ) {
                afterShardVersion = afterMetadata->getShardVersion();
                afterCollVersion = afterMetadata->getCollVersion();
            }

            *latestShardVersion = afterShardVersion;
            //
            // Resolve newer pending chunks with the remote metadata, finish construction
            //

            status = mdLoader.promotePendingChunks( afterMetadata.get(), remoteMetadataRaw );

            if ( !status.isOK() ) {

                warning() << "remote metadata for " << ns
                          << " is inconsistent with current pending chunks"
                          << causedBy( status.reason() ) << endl;

                return status;
            }

            //
            // Compare the 'before', 'after', and 'remote' versions/epochs and choose newest
            // Zero-epochs (sentinel value for "dropped" collections), are tested by
            // !epoch.isSet().
            //

            choice = ChunkVersion::chooseNewestVersion( beforeCollVersion,
                                                        afterCollVersion,
                                                        remoteCollVersion );

            if ( choice == ChunkVersion::VersionChoice_Remote ) {
                dassert(!remoteCollVersion.epoch().isSet() ||
                        remoteShardVersion >= beforeShardVersion);

                if ( !afterCollVersion.epoch().isSet() ) {

                    // First metadata load
                    installType = InstallType_New;
                    dassert( it == _collMetadata.end() );
                    _collMetadata.insert( make_pair( ns, remoteMetadata ) );
                }
                else if ( remoteCollVersion.epoch().isSet() &&
                          remoteCollVersion.epoch() == afterCollVersion.epoch() ) {

                    // Update to existing metadata
                    installType = InstallType_Update;

                    // Invariant: If CollMetadata was not found, version should be have been 0.
                    dassert( it != _collMetadata.end() );
                    it->second = remoteMetadata;
                }
                else if ( remoteCollVersion.epoch().isSet() ) {

                    // New epoch detected, replacing metadata
                    installType = InstallType_Replace;

                    // Invariant: If CollMetadata was not found, version should be have been 0.
                    dassert( it != _collMetadata.end() );
                    it->second = remoteMetadata;
                }
                else {
                    dassert( !remoteCollVersion.epoch().isSet() );

                    // Drop detected
                    installType = InstallType_Drop;
                    _collMetadata.erase( it );
                }

                *latestShardVersion = remoteShardVersion;
            }
        }
        // End _mutex
        // End DBWrite

        //
        // Do messaging based on what happened above
        //

        string versionMsg = str::stream()
            << " (loaded metadata version : " << remoteCollVersion.toString()
            << ( beforeCollVersion.epoch() == afterCollVersion.epoch() ?
                     string( ", stored version : " ) + afterCollVersion.toString() :
                     string( ", stored versions : " ) +
                         beforeCollVersion.toString() + " / " + afterCollVersion.toString() )
            << ", took " << refreshMillis << "ms)";

        if ( choice == ChunkVersion::VersionChoice_Unknown ) {

            string errMsg =
                str::stream() << "need to retry loading metadata for " << ns
                              << ", collection may have been dropped or recreated during load"
                              << versionMsg;

            warning() << errMsg << endl;
            return Status( ErrorCodes::RemoteChangeDetected, errMsg );
        }

        if ( choice == ChunkVersion::VersionChoice_Local ) {

            LOG( 0 ) << "newer metadata not found for " << ns << versionMsg << endl;
            return Status::OK();
        }

        dassert( choice == ChunkVersion::VersionChoice_Remote );

        switch( installType ) {
        case InstallType_New:
            LOG( 0 ) << "loaded new metadata for " << ns << versionMsg << endl;
            break;
        case InstallType_Update:
            LOG( 0 ) << "loaded newer metadata for " << ns << versionMsg << endl;
            break;
        case InstallType_Replace:
            LOG( 0 ) << "replacing metadata for " << ns << versionMsg << endl;
            break;
        case InstallType_Drop:
            LOG( 0 ) << "dropping metadata for " << ns << versionMsg << endl;
            break;
        default:
            verify( false );
            break;
        }

        return Status::OK();
    }

    void ShardingState::appendInfo( BSONObjBuilder& b ) {
        b.appendBool( "enabled" , _enabled );
        if ( ! _enabled )
            return;

        b.append( "configServer" , _configServer );
        b.append( "shardName" , _shardName );

        {
            BSONObjBuilder bb( b.subobjStart( "versions" ) );

            scoped_lock lk(_mutex);

            for ( CollectionMetadataMap::iterator it = _collMetadata.begin(); it != _collMetadata.end(); ++it ) {
                CollectionMetadataPtr p = it->second;
                bb.appendTimestamp( it->first , p->getShardVersion().toLong() );
            }
            bb.done();
        }

    }

    bool ShardingState::needCollectionMetadata( const string& ns ) const {
        if ( ! _enabled )
            return false;

        if ( ! ShardedConnectionInfo::get( false ) )
            return false;

        return true;
    }

    CollectionMetadataPtr ShardingState::getCollectionMetadata( const string& ns ) {
        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        if ( it == _collMetadata.end() ) {
            return CollectionMetadataPtr();
        }
        else {
            return it->second;
        }
    }

    ShardingState shardingState;

    // -----ShardingState END ----

    // -----ShardedConnectionInfo START ----

    boost::thread_specific_ptr<ShardedConnectionInfo> ShardedConnectionInfo::_tl;

    ShardedConnectionInfo::ShardedConnectionInfo() {
        _forceVersionOk = false;
        _id.clear();
    }

    ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ) {
        ShardedConnectionInfo* info = _tl.get();
        if ( ! info && create ) {
            LOG(1) << "entering shard mode for connection" << endl;
            info = new ShardedConnectionInfo();
            _tl.reset( info );
        }
        return info;
    }

    void ShardedConnectionInfo::reset() {
        _tl.reset();
    }

    const ChunkVersion ShardedConnectionInfo::getVersion( const string& ns ) const {
        NSVersionMap::const_iterator it = _versions.find( ns );
        if ( it != _versions.end() ) {
            return it->second;
        }
        else {
            return ChunkVersion( 0, OID() );
        }
    }

    void ShardedConnectionInfo::setVersion( const string& ns , const ChunkVersion& version ) {
        _versions[ns] = version;
    }

    void ShardedConnectionInfo::addHook() {
        static mongo::mutex lock("ShardedConnectionInfo::addHook mutex");
        static bool done = false;

        scoped_lock lk(lock);
        if (!done) {
            log() << "first cluster operation detected, adding sharding hook to enable versioning "
                    "and authentication to remote servers" << endl;
            pool.addHook(new ShardingConnectionHook(false));
            shardConnectionPool.addHook(new ShardingConnectionHook(true));
            done = true;
        }
    }

    void ShardedConnectionInfo::setID( const OID& id ) {
        _id = id;
    }

    class MongodShardCommand : public Command {
    public:
        MongodShardCommand( const char * n ) : Command( n ) {
        }
        virtual bool slaveOk() const {
            return false;
        }
        virtual bool adminOnly() const {
            return true;
        }
    };


    bool haveLocalShardingInfo( const string& ns ) {
        if ( ! shardingState.enabled() )
            return false;

        if ( ! shardingState.hasVersion( ns ) )
            return false;

        return ShardedConnectionInfo::get(false) > 0;
    }

    class UnsetShardingCommand : public MongodShardCommand {
    public:
        UnsetShardingCommand() : MongodShardCommand("unsetSharding") {}

        virtual void help( stringstream& help ) const {
            help << "internal";
        }

        virtual LockType locktype() const { return NONE; }

        virtual bool slaveOk() const { return true; }

        virtual void addRequiredPrivileges(const std::string& dbname,
                                           const BSONObj& cmdObj,
                                           std::vector<Privilege>* out) {
            ActionSet actions;
            actions.addAction(ActionType::internal);
            out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
        }

        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
            ShardedConnectionInfo::reset();
            return true;
        }

    } unsetShardingCommand;

    class SetShardVersion : public MongodShardCommand {
    public:
        SetShardVersion() : MongodShardCommand("setShardVersion") {}

        virtual void help( stringstream& help ) const {
            help << "internal";
        }

        virtual bool slaveOk() const { return true; }
        virtual LockType locktype() const { return NONE; }
        
        virtual void addRequiredPrivileges(const std::string& dbname,
                                           const BSONObj& cmdObj,
                                           std::vector<Privilege>* out) {
            ActionSet actions;
            actions.addAction(ActionType::internal);
            out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
        }

        bool checkConfigOrInit( const string& configdb , bool authoritative , string& errmsg , BSONObjBuilder& result , bool locked=false ) const {
            if ( configdb.size() == 0 ) {
                errmsg = "no configdb";
                return false;
            }
            
            if ( shardingState.enabled() ) {
                if ( configdb == shardingState.getConfigServer() ) 
                    return true;
                
                result.append( "configdb" , BSON( "stored" << shardingState.getConfigServer() << 
                                                  "given" << configdb ) );
                                                  
                errmsg = str::stream() << "mongos specified a different config database string : "
                                       << "stored : " << shardingState.getConfigServer()
                                       << " vs given : " << configdb;
                return false;
            }
            
            if ( ! authoritative ) {
                result.appendBool( "need_authoritative" , true );
                errmsg = "first setShardVersion";
                return false;
            }
            
            if ( locked ) {
                ShardingState::initialize(configdb);
                return true;
            }

            Lock::GlobalWrite lk;
            return checkConfigOrInit( configdb , authoritative , errmsg , result , true );
        }
        
        bool checkMongosID( ShardedConnectionInfo* info, const BSONElement& id, string& errmsg ) {
            if ( id.type() != jstOID ) {
                if ( ! info->hasID() ) {
                    warning() << "bad serverID set in setShardVersion and none in info: " << id << endl;
                }
                // TODO: fix this
                //errmsg = "need serverID to be an OID";
                //return 0;
                return true;
            }
            
            OID clientId = id.__oid();
            if ( ! info->hasID() ) {
                info->setID( clientId );
                return true;
            }
            
            if ( clientId != info->getID() ) {
                errmsg = "server id has changed!";
                return false;
            }

            return true;
        }

        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) 
                return false;

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
            }
            
            // Handle initial shard connection
            if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){

                result.append( "initialized", true );

                // Send back wire version to let mongos know what protocol we can speak
                result.append( "minWireVersion", minWireVersion );
                result.append( "maxWireVersion", maxWireVersion );

                return true;
            }

            // we can run on a slave up to here
            if ( ! isMaster( "admin" ) ) {
                result.append( "errmsg" , "not master" );
                result.append( "note" , "from post init in setShardVersion" );
                return false;
            }

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify namespace";
                return false;
            }

            if( ! ChunkVersion::canParseBSON( cmdObj, "version" ) ){
                errmsg = "need to specify version";
                return false;
            }

            const ChunkVersion version = ChunkVersion::fromBSON( cmdObj, "version" );
            
            // step 3

            const ChunkVersion oldVersion = info->getVersion(ns);
            const ChunkVersion globalVersion = shardingState.getVersion(ns);

            oldVersion.addToBSON( result, "oldVersion" );
            
            if ( globalVersion.isSet() && version.isSet() ) {
                // this means there is no reset going on an either side
                // so its safe to make some assumptions

                if ( version.isWriteCompatibleWith( globalVersion ) ) {
                    // mongos and mongod agree!
                    if ( ! oldVersion.isWriteCompatibleWith( version ) ) {
                        if ( oldVersion < globalVersion &&
                             oldVersion.hasCompatibleEpoch(globalVersion) )
                        {
                            info->setVersion( ns , version );
                        }
                        else if ( authoritative ) {
                            // this means there was a drop and our version is reset
                            info->setVersion( ns , version );
                        }
                        else {
                            result.append( "ns" , ns );
                            result.appendBool( "need_authoritative" , true );
                            errmsg = "verifying drop on '" + ns + "'";
                            return false;
                        }
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
                // this had been reset
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
            }

            if ( ! version.isSet() && ! globalVersion.isSet() ) {
                // this connection is cleaning itself
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            // Cases below all either return OR fall-through to remote metadata reload.
            if ( version.isSet() || !globalVersion.isSet() ) {

                // Not Dropping

                // TODO: Refactor all of this
                if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) {
                    errmsg = "this connection already had a newer version of collection '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "newVersion" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    return false;
                }

                // TODO: Refactor all of this
                if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) {
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }
                    errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "version" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    result.appendBool( "reloadConfig" , true );
                    return false;
                }

                if ( ! globalVersion.isSet() && ! authoritative ) {
                    // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
                    // should require a reload.
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }

                    // need authoritative for first look
                    result.append( "ns" , ns );
                    result.appendBool( "need_authoritative" , true );
                    errmsg = "first time for collection '" + ns + "'";
                    return false;
                }

                // Fall through to metadata reload below
            }
            else {

                // Dropping

                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    globalVersion.addToBSON( result, "globalVersion" );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }

                // Fall through to metadata reload below
            }

            ChunkVersion currVersion;
            Status status = shardingState.refreshMetadataIfNeeded( ns, version, &currVersion );

            if (!status.isOK()) {

                // The reload itself was interrupted or confused here

                errmsg = str::stream() << "could not refresh metadata for " << ns
                                       << " with requested shard version " << version.toString()
                                       << ", stored shard version is " << currVersion.toString()
                                       << causedBy( status.reason() );

                warning() << errmsg << endl;

                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                currVersion.addToBSON( result, "globalVersion" );
                result.appendBool( "reloadConfig", true );

                return false;
            }
            else if ( !version.isWriteCompatibleWith( currVersion ) ) {

                // We reloaded a version that doesn't match the version mongos was trying to
                // set.

                errmsg = str::stream() << "requested shard version differs from"
                                       << " config shard version for " << ns
                                       << ", requested version is " << version.toString()
                                       << " but found version " << currVersion.toString();

                OCCASIONALLY warning() << errmsg << endl;

                // WARNING: the exact fields below are important for compatibility with mongos
                // version reload.

                result.append( "ns" , ns );
                currVersion.addToBSON( result, "globalVersion" );

                // If this was a reset of a collection or the last chunk moved out, inform mongos to
                // do a full reload.
                if (currVersion.epoch() != version.epoch() || !currVersion.isSet() ) {
                    result.appendBool( "reloadConfig", true );
                    // Zero-version also needed to trigger full mongos reload, sadly
                    // TODO: Make this saner, and less impactful (full reload on last chunk is bad)
                    ChunkVersion( 0, 0, OID() ).addToBSON( result, "version" );
                    // For debugging
                    version.addToBSON( result, "origVersion" );
                }
                else {
                    version.addToBSON( result, "version" );
                }

                return false;
            }

            info->setVersion( ns , version );
            return true;
        }

    } setShardVersionCmd;

    class GetShardVersion : public MongodShardCommand {
    public:
        GetShardVersion() : MongodShardCommand("getShardVersion") {}

        virtual void help( stringstream& help ) const {
            help << " example: { getShardVersion : 'alleyinsider.foo'  } ";
        }

        virtual LockType locktype() const { return NONE; }

        virtual Status checkAuthForCommand(ClientBasic* client,
                                           const std::string& dbname,
                                           const BSONObj& cmdObj) {
            if (!client->getAuthorizationSession()->isAuthorizedForActionsOnResource(
                    ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
                    ActionType::getShardVersion)) {
                return Status(ErrorCodes::Unauthorized, "Unauthorized");
            }
            return Status::OK();
        }
        virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const {
            return parseNsFullyQualified(dbname, cmdObj);
        }

        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
            string ns = cmdObj["getShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify full namespace";
                return false;
            }

            result.append( "configServer" , shardingState.getConfigServer() );

            result.appendTimestamp( "global" , shardingState.getVersion(ns).toLong() );

            ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
            result.appendBool( "inShardedMode" , info != 0 );
            if ( info )
                result.appendTimestamp( "mine" , info->getVersion(ns).toLong() );
            else
                result.appendTimestamp( "mine" , 0 );

            if ( cmdObj["fullMetadata"].trueValue() ) {
                CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( ns );
                if ( metadata ) result.append( "metadata", metadata->toBSON() );
                else result.append( "metadata", BSONObj() );
            }

            return true;
        }

    } getShardVersion;

    class ShardingStateCmd : public MongodShardCommand {
    public:
        ShardingStateCmd() : MongodShardCommand( "shardingState" ) {}

        virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock

        virtual void addRequiredPrivileges(const std::string& dbname,
                                           const BSONObj& cmdObj,
                                           std::vector<Privilege>* out) {
            ActionSet actions;
            actions.addAction(ActionType::shardingState);
            out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
        }

        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
            shardingState.appendInfo( result );
            return true;
        }

    } shardingStateCmd;

    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) {

        if ( ! shardingState.enabled() )
            return true;

        if ( ! isMasterNs( ns.c_str() ) )  {
            // right now connections to secondaries aren't versioned at all
            return true;
        }

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO : all collections at some point, be sharded or not, will have a version
        //  (and a CollectionMetadata)
        received = info->getVersion( ns );
        wanted = shardingState.getVersion( ns );

        if( received.isWriteCompatibleWith( wanted ) ) return true;

        //
        // Figure out exactly why not compatible, send appropriate error message
        // The versions themselves are returned in the error, so not needed in messages here
        //

        // Check epoch first, to send more meaningful message, since other parameters probably
        // won't match either
        if( ! wanted.hasCompatibleEpoch( received ) ){
            errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", "
                                   << "the collection may have been dropped and recreated";
            return false;
        }

        if( ! wanted.isSet() && received.isSet() ){
            errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", "
                                   << "the collection may have been dropped";
            return false;
        }

        if( wanted.isSet() && ! received.isSet() ){
            errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", "
                                   << "but no version set in request";
            return false;
        }

        if( wanted.majorVersion() != received.majorVersion() ){

            //
            // Could be > or < - wanted is > if this is the source of a migration,
            // wanted < if this is the target of a migration
            //

            errmsg = str::stream() << "version mismatch detected for " << ns << ", "
                                   << "stored major version " << wanted.majorVersion()
                                   << " does not match received " << received.majorVersion();
            return false;
        }

        // Those are all the reasons the versions can mismatch
        verify( false );

        return false;

    }

    void usingAShardConnection( const string& addr ) {
    }

}
Example #21
0
    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) {

        if ( ! shardingState.enabled() )
            return true;

        if ( ! isMasterNs( ns.c_str() ) )  {
            // right now connections to secondaries aren't versioned at all
            return true;
        }

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO : all collections at some point, be sharded or not, will have a version
        //  (and a CollectionMetadata)
        received = info->getVersion( ns );
        wanted = shardingState.getVersion( ns );

        if( received.isWriteCompatibleWith( wanted ) ) return true;

        //
        // Figure out exactly why not compatible, send appropriate error message
        // The versions themselves are returned in the error, so not needed in messages here
        //

        // Check epoch first, to send more meaningful message, since other parameters probably
        // won't match either
        if( ! wanted.hasCompatibleEpoch( received ) ){
            errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", "
                                   << "the collection may have been dropped and recreated";
            return false;
        }

        if( ! wanted.isSet() && received.isSet() ){
            errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", "
                                   << "the collection may have been dropped";
            return false;
        }

        if( wanted.isSet() && ! received.isSet() ){
            errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", "
                                   << "but no version set in request";
            return false;
        }

        if( wanted.majorVersion() != received.majorVersion() ){

            //
            // Could be > or < - wanted is > if this is the source of a migration,
            // wanted < if this is the target of a migration
            //

            errmsg = str::stream() << "version mismatch detected for " << ns << ", "
                                   << "stored major version " << wanted.majorVersion()
                                   << " does not match received " << received.majorVersion();
            return false;
        }

        // Those are all the reasons the versions can mismatch
        verify( false );

        return false;

    }