Beispiel #1
0
    int ConfigDiffTracker<ValType,ShardType>::
        calculateConfigDiff( DBClientCursorInterface& diffCursor )
    {
        verifyAttached();

        // Apply the chunk changes to the ranges and versions

        //
        // Overall idea here is to work in two steps :
        // 1. For all the new chunks we find, increment the maximum version per-shard and
        //    per-collection, and remove any conflicting chunks from the ranges
        // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the
        //    shard for mongod) add them to the ranges
        //

        vector<BSONObj> newTracked;
        // Store epoch now so it doesn't change when we change max
        OID currEpoch = _maxVersion->epoch();

        _validDiffs = 0;
        while( diffCursor.more() ){

            BSONObj diffChunkDoc = diffCursor.next();

            ShardChunkVersion chunkVersion = ShardChunkVersion::fromBSON( diffChunkDoc, "lastmod" );

            if( diffChunkDoc[ "min" ].type() != Object || diffChunkDoc[ "max" ].type() != Object ||
                diffChunkDoc[ "shard" ].type() != String )
            {
                warning() << "got invalid chunk document " << diffChunkDoc
                          << " when trying to load differing chunks" << endl;
                continue;
            }

            if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ){

                warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc
                          << " when trying to load differing chunks at version "
                          << ShardChunkVersion( _maxVersion->toLong(), currEpoch ) << endl;

                // Don't keep loading, since we know we'll be broken here
                return -1;
            }

            _validDiffs++;

            // Get max changed version and chunk version
            if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion;

            // Chunk version changes
            ShardType shard = shardFor( diffChunkDoc[ "shard" ].String() );
            typename map<ShardType, ShardChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard );
            if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ){
                (*_maxShardVersions)[ shard ] = chunkVersion;
            }

            // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes
            removeOverlapping( diffChunkDoc[ "min" ].Obj(), diffChunkDoc[ "max" ].Obj() );

            // Figure out which of the new chunks we need to track
            // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer
            if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() );
        }

        LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns
               << " (tracking " << newTracked.size() << "), new version is " << _maxVersion << endl;

        for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ){

            BSONObj chunkDoc = *it;

            // Important - we need to make sure we actually own the min and max here
            BSONObj min = chunkDoc[ "min" ].Obj().getOwned();
            BSONObj max = chunkDoc[ "max" ].Obj().getOwned();

            // Invariant enforced by sharding
            // It's possible to read inconsistent state b/c of getMore() and yielding, so we want
            // to detect as early as possible.
            // TODO: This checks for overlap, we also should check for holes here iff we're tracking
            // all chunks
            if( isOverlapping( min, max ) ) return -1;

            _currMap->insert( rangeFor( chunkDoc, min, max ) );
        }

        return _validDiffs;
    }
Beispiel #2
0
    bool MetadataLoader::initChunks(const CollectionType& collDoc,
                                    const string& ns,
                                    const string& shard,
                                    const CollectionManager* oldManager,
                                    CollectionManager* manager,
                                    string* errMsg) {

        map<string,ShardChunkVersion> versionMap;
        manager->_maxCollVersion = ShardChunkVersion(0, 0, collDoc.getEpoch());

        // Check to see if we should use the old version or not.
        if (oldManager) {

            ShardChunkVersion oldVersion = oldManager->getMaxShardVersion();

            if (oldVersion.isSet() && oldVersion.hasCompatibleEpoch(collDoc.getEpoch())) {

                // Our epoch for coll version and shard version should be the same.
                verify(oldManager->getMaxCollVersion().hasCompatibleEpoch(collDoc.getEpoch()));

                versionMap[shard] = oldManager->_maxShardVersion;
                manager->_maxCollVersion = oldManager->_maxCollVersion;

                // TODO: This could be made more efficient if copying not required, but
                // not as frequently reloaded as in mongos.
                manager->_chunksMap = oldManager->_chunksMap;

                LOG(2) << "loading new chunks for collection " << ns
                       << " using old chunk manager w/ version "
                       << oldManager->getMaxShardVersion()
                       << " and " << manager->_chunksMap.size() << " chunks" << endl;
            }
        }

        // Exposes the new 'manager's range map and version to the "differ," who
        // would ultimately be responsible of filling them up.
        SCMConfigDiffTracker differ(shard);
        differ.attach(ns, manager->_chunksMap, manager->_maxCollVersion, versionMap);

        try {

            scoped_ptr<ScopedDbConnection> connPtr(
                ScopedDbConnection::getInternalScopedDbConnection(_configLoc.toString(), 30));
            ScopedDbConnection& conn = *connPtr;

            auto_ptr<DBClientCursor> cursor = conn->query(ChunkType::ConfigNS,
                                                          differ.configDiffQuery());

            if (!cursor.get()) {
                // 'errMsg' was filled by the getChunkCursor() call.
                manager->_maxCollVersion = ShardChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }

            // Diff tracker should *always* find at least one chunk if collection exists.
            int diffsApplied = differ.calculateConfigDiff(*cursor);
            if (diffsApplied > 0) {

                LOG(2) << "loaded " << diffsApplied
                       << " chunks into new chunk manager for " << ns
                       << " with version " << manager->_maxCollVersion << endl;

                manager->_maxShardVersion = versionMap[shard];
                manager->fillRanges();
                connPtr->done();
                return true;
            }
            else if(diffsApplied == 0) {

                *errMsg = str::stream() << "no chunks found when reloading " << ns
                                        << ", previous version was "
                                        << manager->_maxCollVersion.toString();

                warning() << *errMsg << endl;

                manager->_maxCollVersion = ShardChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }
            else{

                // TODO: make this impossible by making sure we don't migrate / split on this
                // shard during the reload.  No chunks were found for the ns.

                *errMsg = str::stream() << "invalid chunks found when reloading " << ns
                                        << ", previous version was "
                                        << manager->_maxCollVersion.toString()
                                        << ", this should be rare";

                warning() << errMsg << endl;

                manager->_maxCollVersion = ShardChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }
        }
        catch (const DBException& e) {
            *errMsg = str::stream() << "caught exception accessing the config servers"
                                    << causedBy(e);

            // We deliberately do not return connPtr to the pool, since it was involved
            // with the error here.

            return false;
        }
    }
Beispiel #3
0
    void WriteBackListener::run() {

        int secsToSleep = 0;
        scoped_ptr<ShardChunkVersion> lastNeededVersion;
        int lastNeededCount = 0;

        while ( ! inShutdown() ) {

            if ( ! Shard::isAShardNode( _addr ) ) {
                LOG(1) << _addr << " is not a shard node" << endl;
                sleepsecs( 60 );
                continue;
            }

            try {
                scoped_ptr<ScopedDbConnection> conn(
                        ScopedDbConnection::getInternalScopedDbConnection( _addr ) );

                BSONObj result;

                {
                    BSONObjBuilder cmd;
                    cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data
                    if ( ! conn->get()->runCommand( "admin" , cmd.obj() , result ) ) {
                        result = result.getOwned();
                        log() <<  "writebacklisten command failed!  "  << result << endl;
                        conn->done();
                        continue;
                    }

                }
                conn->done();

                LOG(1) << "writebacklisten result: " << result << endl;

                BSONObj data = result.getObjectField( "data" );
                if ( data.getBoolField( "writeBack" ) ) {
                    string ns = data["ns"].valuestrsafe();

                    ConnectionIdent cid( "" , 0 );
                    OID wid;
                    if ( data["connectionId"].isNumber() && data["id"].type() == jstOID ) {
                        string s = "";
                        if ( data["instanceIdent"].type() == String )
                            s = data["instanceIdent"].String();
                        cid = ConnectionIdent( s , data["connectionId"].numberLong() );
                        wid = data["id"].OID();
                    }
                    else {
                        warning() << "mongos/mongod version mismatch (1.7.5 is the split)" << endl;
                    }

                    int len; // not used, but needed for next call
                    Message msg( (void*)data["msg"].binData( len ) , false );
                    massert( 10427 ,  "invalid writeback message" , msg.header()->valid() );

                    DBConfigPtr db = grid.getDBConfig( ns );
                    ShardChunkVersion needVersion = ShardChunkVersion::fromBSON( data, "version" );

                    //
                    // TODO: Refactor the sharded strategy to correctly handle all sharding state changes itself,
                    // we can't rely on WBL to do this for us b/c anything could reset our state in-between.
                    // We should always reload here for efficiency when possible, but staleness is also caught in the
                    // loop below.
                    //

                    ChunkManagerPtr manager;
                    ShardPtr primary;
                    db->getChunkManagerOrPrimary( ns, manager, primary );

                    ShardChunkVersion currVersion;
                    if( manager ) currVersion = manager->getVersion();

                    LOG(1) << "connectionId: " << cid << " writebackId: " << wid << " needVersion : " << needVersion.toString()
                           << " mine : " << currVersion.toString() << endl;

                    LOG(1) << msg.toString() << endl;

                    //
                    // We should reload only if we need to update our version to be compatible *and* we
                    // haven't already done so.  This avoids lots of reloading when we remove/add a sharded collection
                    //

                    bool alreadyReloaded = lastNeededVersion &&
                                           lastNeededVersion->isEquivalentTo( needVersion );

                    if( alreadyReloaded ){

                        LOG(1) << "wbl already reloaded config information for version "
                               << needVersion << ", at version " << currVersion << endl;
                    }
                    else if( lastNeededVersion ) {

                        log() << "new version change detected to " << needVersion.toString()
                              << ", " << lastNeededCount << " writebacks processed at "
                              << lastNeededVersion->toString() << endl;

                        lastNeededCount = 0;
                    }

                    //
                    // Set our lastNeededVersion for next time
                    //

                    lastNeededVersion.reset( new ShardChunkVersion( needVersion ) );
                    lastNeededCount++;

                    //
                    // Determine if we should reload, if so, reload
                    //

                    bool shouldReload = ! needVersion.isWriteCompatibleWith( currVersion ) &&
                                        ! alreadyReloaded;

                    if( shouldReload && currVersion.isSet()
                                     && needVersion.isSet()
                                     && currVersion.hasCompatibleEpoch( needVersion ) )
                    {

                        //
                        // If we disagree about versions only, reload the chunk manager
                        //

                        db->getChunkManagerIfExists( ns, true );
                    }
                    else if( shouldReload ){

                        //
                        // If we disagree about anything else, reload the full db
                        //

                        warning() << "reloading config data for " << db->getName() << ", "
                                  << "wanted version " << needVersion.toString()
                                  << " but currently have version " << currVersion.toString() << endl;

                        db->reload();
                    }

                    // do request and then call getLastError
                    // we have to call getLastError so we can return the right fields to the user if they decide to call getLastError

                    BSONObj gle;
                    int attempts = 0;
                    while ( true ) {
                        attempts++;

                        try {

                            Request r( msg , 0 );
                            r.init();

                            r.d().reservedField() |= Reserved_FromWriteback;

                            ClientInfo * ci = r.getClientInfo();
                            if (!noauth) {
                                // TODO: Figure out why this is 'admin' instead of 'local'.
                                ci->getAuthenticationInfo()->authorize("admin", internalSecurity.user);
                            }
                            ci->noAutoSplit();

                            r.process( attempts );

                            ci->newRequest(); // this so we flip prev and cur shards

                            BSONObjBuilder b;
                            string errmsg;
                            if ( ! ci->getLastError( "admin",
                                                     BSON( "getLastError" << 1 ),
                                                     b,
                                                     errmsg,
                                                     true ) )
                            {
                                b.appendBool( "commandFailed" , true );
                                if( ! b.hasField( "errmsg" ) ){

                                    b.append( "errmsg", errmsg );
                                    gle = b.obj();
                                }
                                else if( errmsg.size() > 0 ){

                                    // Rebuild GLE object with errmsg
                                    // TODO: Make this less clumsy by improving GLE interface
                                    gle = b.obj();

                                    if( gle["errmsg"].type() == String ){

                                        BSONObj gleNoErrmsg =
                                                gle.filterFieldsUndotted( BSON( "errmsg" << 1 ),
                                                                          false );
                                        BSONObjBuilder bb;
                                        bb.appendElements( gleNoErrmsg );
                                        bb.append( "errmsg", gle["errmsg"].String() +
                                                             " ::and:: " +
                                                             errmsg );
                                        gle = bb.obj().getOwned();
                                    }
                                }
                            }
                            else{
                                gle = b.obj();
                            }

                            log() << "GLE is " << gle << endl;

                            if ( gle["code"].numberInt() == 9517 ) {

                                log() << "new version change detected, "
                                      << lastNeededCount << " writebacks processed previously" << endl;

                                lastNeededVersion.reset();
                                lastNeededCount = 1;

                                log() << "writeback failed because of stale config, retrying attempts: " << attempts << endl;
                                LOG(1) << "writeback error : " << gle << endl;

                                //
                                // Bringing this in line with the similar retry logic elsewhere
                                //
                                // TODO: Reloading the chunk manager may not help if we dropped a
                                // collection, but we don't actually have that info in the writeback
                                // error
                                //

                                if( attempts <= 2 ){
                                    db->getChunkManagerIfExists( ns, true );
                                }
                                else{
                                    versionManager.forceRemoteCheckShardVersionCB( ns );
                                    sleepsecs( attempts - 1 );
                                }

                                uassert( 15884, str::stream()
                                         << "Could not reload chunk manager after "
                                         << attempts << " attempts.", attempts <= 4 );

                                continue;
                            }

                            ci->clearSinceLastGetError();
                        }
                        catch ( DBException& e ) {
                            error() << "error processing writeback: " << e << endl;
                            BSONObjBuilder b;
                            e.getInfo().append( b, "err", "code" );
                            gle = b.obj();
                        }

                        break;
                    }

                    {
                        scoped_lock lk( _seenWritebacksLock );
                        WBStatus& s = _seenWritebacks[cid];
                        s.id = wid;
                        s.gle = gle;
                    }
                }
                else if ( result["noop"].trueValue() ) {
                    // no-op
                }
                else {
                    log() << "unknown writeBack result: " << result << endl;
                }

                secsToSleep = 0;
                continue;
            }
            catch ( std::exception& e ) {

                if ( inShutdown() ) {
                    // we're shutting down, so just clean up
                    return;
                }

                log() << "WriteBackListener exception : " << e.what() << endl;

                // It's possible this shard was removed
                Shard::reloadShardInfo();
            }
            catch ( ... ) {
                log() << "WriteBackListener uncaught exception!" << endl;
            }
            secsToSleep++;
            sleepsecs(secsToSleep);
            if ( secsToSleep > 10 )
                secsToSleep = 0;
        }

        log() << "WriteBackListener exiting : address no longer in cluster " << _addr;

    }