Пример #1
0
void Chunk::serialize(BSONObjBuilder& to, ChunkVersion myLastMod) {
    to.append("_id", genID(_manager->getns(), _min));

    if (myLastMod.isSet()) {
        myLastMod.addToBSON(to, ChunkType::DEPRECATED_lastmod());
    } else if (_lastmod.isSet()) {
        _lastmod.addToBSON(to, ChunkType::DEPRECATED_lastmod());
    } else {
        verify(0);
    }

    to << ChunkType::ns(_manager->getns());
    to << ChunkType::min(_min);
    to << ChunkType::max(_max);
    to << ChunkType::shard(_shardId);
}
Пример #2
0
unique_ptr<CollectionMetadata> CollectionMetadata::clonePlusChunk(
    const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& newShardVersion) const {
    invariant(newShardVersion.epoch() == _shardVersion.epoch());
    invariant(newShardVersion.isSet());
    invariant(minKey.woCompare(maxKey) < 0);
    invariant(!rangeMapOverlaps(_chunksMap, minKey, maxKey));

    unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>());
    metadata->_keyPattern = _keyPattern.getOwned();
    metadata->fillKeyPatternFields();
    metadata->_pendingMap = _pendingMap;
    metadata->_chunksMap = _chunksMap;
    metadata->_chunksMap.insert(make_pair(minKey.getOwned(), maxKey.getOwned()));
    metadata->_shardVersion = newShardVersion;
    metadata->_collVersion = newShardVersion > _collVersion ? newShardVersion : _collVersion;
    metadata->fillRanges();

    invariant(metadata->isValid());
    return metadata;
}
Пример #3
0
int ConfigDiffTracker<ValType, ShardType>::calculateConfigDiff(
    const std::vector<ChunkType>& chunks) {
    _assertAttached();

    // Apply the chunk changes to the ranges and versions
    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //      per-collection, and remove any conflicting chunks from the ranges.
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on
    //      the shard for mongod) add them to the ranges.

    std::vector<ChunkType> newTracked;

    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;

    for (const ChunkType& chunk : chunks) {
        ChunkVersion chunkVersion =
            ChunkVersion::fromBSON(chunk.toBSON(), ChunkType::DEPRECATED_lastmod());

        if (!chunkVersion.isSet() || !chunkVersion.hasEqualEpoch(currEpoch)) {
            warning() << "got invalid chunk version " << chunkVersion << " in document "
                      << chunk.toString() << " when trying to load differing chunks at version "
                      << ChunkVersion(
                          _maxVersion->majorVersion(), _maxVersion->minorVersion(), currEpoch);

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if (chunkVersion > *_maxVersion) {
            *_maxVersion = chunkVersion;
        }

        // Chunk version changes
        ShardType shard = shardFor(chunk.getShard());

        typename MaxChunkVersionMap::const_iterator shardVersionIt = _maxShardVersions->find(shard);
        if (shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion) {
            (*_maxShardVersions)[shard] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking because of this
        // chunk's changes
        removeOverlapping(chunk.getMin(), chunk.getMax());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore
        // or unbuffer.
        if (isTracked(chunk)) {
            newTracked.push_back(chunk);
        }
    }

    LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking "
           << newTracked.size() << "), new version is " << *_maxVersion;

    for (const ChunkType& chunk : newTracked) {
        // Invariant enforced by sharding - it's possible to read inconsistent state due to
        // getMore and yielding, so we want to detect it as early as possible.
        //
        // TODO: This checks for overlap, we also should check for holes here iff we're
        // tracking all chunks.
        if (isOverlapping(chunk.getMin(), chunk.getMax())) {
            return -1;
        }

        _currMap->insert(rangeFor(chunk));
    }

    return _validDiffs;
}
Пример #4
0
void ChunkType::setVersion(const ChunkVersion& version) {
    invariant(version.isSet());
    _version = version;
}
int ConfigDiffTracker<ValType,ShardType>::
calculateConfigDiff( DBClientCursorInterface& diffCursor )
{
    verifyAttached();

    // Apply the chunk changes to the ranges and versions

    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //    per-collection, and remove any conflicting chunks from the ranges
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the
    //    shard for mongod) add them to the ranges
    //

    vector<BSONObj> newTracked;
    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;
    while( diffCursor.more() ) {

        BSONObj diffChunkDoc = diffCursor.next();

        ChunkVersion chunkVersion = ChunkVersion::fromBSON(diffChunkDoc, ChunkType::DEPRECATED_lastmod());

        if( diffChunkDoc[ChunkType::min()].type() != Object ||
                diffChunkDoc[ChunkType::max()].type() != Object ||
                diffChunkDoc[ChunkType::shard()].type() != String )
        {
            warning() << "got invalid chunk document " << diffChunkDoc
                      << " when trying to load differing chunks" << endl;
            continue;
        }

        if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ) {

            warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc
                      << " when trying to load differing chunks at version "
                      << ChunkVersion( _maxVersion->toLong(), currEpoch ) << endl;

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion;

        // Chunk version changes
        ShardType shard = shardFor( diffChunkDoc[ChunkType::shard()].String() );
        typename map<ShardType, ChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard );
        if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ) {
            (*_maxShardVersions)[ shard ] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes
        removeOverlapping(diffChunkDoc[ChunkType::min()].Obj(),
                          diffChunkDoc[ChunkType::max()].Obj());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer
        if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() );
    }

    LOG(3) << "found " << _validDiffs
           << " new chunks for collection " << _ns
           << " (tracking " << newTracked.size()
           << "), new version is " << *_maxVersion
           << endl;

    for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ) {

        BSONObj chunkDoc = *it;

        // Important - we need to make sure we actually own the min and max here
        BSONObj min = chunkDoc[ChunkType::min()].Obj().getOwned();
        BSONObj max = chunkDoc[ChunkType::max()].Obj().getOwned();

        // Invariant enforced by sharding
        // It's possible to read inconsistent state b/c of getMore() and yielding, so we want
        // to detect as early as possible.
        // TODO: This checks for overlap, we also should check for holes here iff we're tracking
        // all chunks
        if( isOverlapping( min, max ) ) return -1;

        _currMap->insert( rangeFor( chunkDoc, min, max ) );
    }

    return _validDiffs;
}
Пример #6
0
    Status MetadataLoader::initChunks( const string& ns,
                                       const string& shard,
                                       const CollectionMetadata* oldMetadata,
                                       CollectionMetadata* metadata )
    {
        map<string, ChunkVersion> versionMap;
        OID epoch = metadata->getCollVersion().epoch();

        // Check to see if we should use the old version or not.
        if ( oldMetadata ) {

            ChunkVersion oldVersion = oldMetadata->getShardVersion();

            if ( oldVersion.isSet() && oldVersion.hasCompatibleEpoch( epoch ) ) {

                // Our epoch for coll version and shard version should be the same.
                verify( oldMetadata->getCollVersion().hasCompatibleEpoch( epoch ) );

                versionMap[shard] = oldMetadata->_shardVersion;
                metadata->_collVersion = oldMetadata->_collVersion;

                // TODO: This could be made more efficient if copying not required, but
                // not as frequently reloaded as in mongos.
                metadata->_chunksMap = oldMetadata->_chunksMap;

                LOG(2) << "loading new chunks for collection " << ns
                              << " using old metadata w/ version " << oldMetadata->getShardVersion()
                              << " and " << metadata->_chunksMap.size() << " chunks" << endl;
            }
        }

        // Exposes the new metadata's range map and version to the "differ," who
        // would ultimately be responsible of filling them up.
        SCMConfigDiffTracker differ( shard );
        differ.attach( ns, metadata->_chunksMap, metadata->_collVersion, versionMap );

        try {

            ScopedDbConnection conn( _configLoc.toString(), 30 );

            auto_ptr<DBClientCursor> cursor = conn->query( ChunkType::ConfigNS,
                                                           differ.configDiffQuery() );

            if ( !cursor.get() ) {
                metadata->_collVersion = ChunkVersion();
                metadata->_chunksMap.clear();
                conn.done();
                return Status( ErrorCodes::HostUnreachable,
                               "problem opening chunk metadata cursor" );
            }

            // Diff tracker should *always* find at least one chunk if this shard owns a chunk.
            int diffsApplied = differ.calculateConfigDiff( *cursor );
            if ( diffsApplied > 0 ) {

                LOG(2) << "loaded " << diffsApplied << " chunks into new metadata for " << ns
                           << " with version " << metadata->_collVersion << endl;

                metadata->_shardVersion = versionMap[shard];
                metadata->fillRanges();
                conn.done();
                return Status::OK();
            }
            else if ( diffsApplied == 0 ) {

                warning() << "no chunks found when reloading " << ns << ", previous version was "
                          << metadata->_collVersion.toString() << endl;

                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();
                return Status::OK();
            }
            else {

                // TODO: make this impossible by making sure we don't migrate / split on this
                // shard during the reload.  No chunks were found for the ns.

                string errMsg = str::stream() << "invalid chunks found when reloading " << ns
                                              << ", previous version was "
                                              << metadata->_collVersion.toString()
                                              << ", this should be rare";

                warning() << errMsg << endl;

                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();
                return Status( ErrorCodes::RemoteChangeDetected, errMsg );
            }
        }
        catch ( const DBException& e ) {
            string errMsg = str::stream() << "problem querying chunks metadata" << causedBy( e );

            // We deliberately do not return connPtr to the pool, since it was involved
            // with the error here.

            return Status( ErrorCodes::HostUnreachable, errMsg );
        }
    }
Пример #7
0
std::shared_ptr<ChunkManager> DBConfig::getChunkManager(OperationContext* txn,
                                                        const string& ns,
                                                        bool shouldReload,
                                                        bool forceReload) {
    BSONObj key;
    ChunkVersion oldVersion;
    std::shared_ptr<ChunkManager> oldManager;

    {
        stdx::lock_guard<stdx::mutex> lk(_lock);

        bool earlyReload = !_collections[ns].isSharded() && (shouldReload || forceReload);
        if (earlyReload) {
            // This is to catch cases where there this is a new sharded collection.
            // Note: read the _reloadCount inside the _lock mutex, so _loadIfNeeded will always
            // be forced to perform a reload.
            const auto currentReloadIteration = _reloadCount.load();
            _loadIfNeeded(txn, currentReloadIteration);
        }

        CollectionInfo& ci = _collections[ns];
        uassert(10181, str::stream() << "not sharded:" << ns, ci.isSharded());

        invariant(!ci.key().isEmpty());

        if (!(shouldReload || forceReload) || earlyReload) {
            return ci.getCM();
        }

        key = ci.key().copy();

        if (ci.getCM()) {
            oldManager = ci.getCM();
            oldVersion = ci.getCM()->getVersion();
        }
    }

    invariant(!key.isEmpty());

    // TODO: We need to keep this first one-chunk check in until we have a more efficient way of
    // creating/reusing a chunk manager, as doing so requires copying the full set of chunks
    // currently
    vector<ChunkType> newestChunk;
    if (oldVersion.isSet() && !forceReload) {
        uassertStatusOK(
            grid.catalogClient(txn)->getChunks(txn,
                                               BSON(ChunkType::ns(ns)),
                                               BSON(ChunkType::DEPRECATED_lastmod() << -1),
                                               1,
                                               &newestChunk,
                                               nullptr));

        if (!newestChunk.empty()) {
            invariant(newestChunk.size() == 1);
            ChunkVersion v = newestChunk[0].getVersion();
            if (v.equals(oldVersion)) {
                stdx::lock_guard<stdx::mutex> lk(_lock);
                const CollectionInfo& ci = _collections[ns];
                uassert(15885,
                        str::stream() << "not sharded after reloading from chunks : " << ns,
                        ci.isSharded());
                return ci.getCM();
            }
        }

    } else if (!oldVersion.isSet()) {
        warning() << "version 0 found when " << (forceReload ? "reloading" : "checking")
                  << " chunk manager; collection '" << ns << "' initially detected as sharded";
    }

    // we are not locked now, and want to load a new ChunkManager

    unique_ptr<ChunkManager> tempChunkManager;

    {
        stdx::lock_guard<stdx::mutex> lll(_hitConfigServerLock);

        if (!newestChunk.empty() && !forceReload) {
            // If we have a target we're going for see if we've hit already
            stdx::lock_guard<stdx::mutex> lk(_lock);

            CollectionInfo& ci = _collections[ns];

            if (ci.isSharded() && ci.getCM()) {
                ChunkVersion currentVersion = newestChunk[0].getVersion();

                // Only reload if the version we found is newer than our own in the same epoch
                if (currentVersion <= ci.getCM()->getVersion() &&
                    ci.getCM()->getVersion().hasEqualEpoch(currentVersion)) {
                    return ci.getCM();
                }
            }
        }

        tempChunkManager.reset(new ChunkManager(oldManager->getns(),
                                                oldManager->getShardKeyPattern(),
                                                oldManager->getDefaultCollation(),
                                                oldManager->isUnique()));
        tempChunkManager->loadExistingRanges(txn, oldManager.get());

        if (tempChunkManager->numChunks() == 0) {
            // Maybe we're not sharded any more, so do a full reload
            reload(txn);

            return getChunkManager(txn, ns, false);
        }
    }

    stdx::lock_guard<stdx::mutex> lk(_lock);

    CollectionInfo& ci = _collections[ns];
    uassert(14822, (string) "state changed in the middle: " + ns, ci.isSharded());

    // Reset if our versions aren't the same
    bool shouldReset = !tempChunkManager->getVersion().equals(ci.getCM()->getVersion());

    // Also reset if we're forced to do so
    if (!shouldReset && forceReload) {
        shouldReset = true;
        warning() << "chunk manager reload forced for collection '" << ns << "', config version is "
                  << tempChunkManager->getVersion();
    }

    //
    // LEGACY BEHAVIOR
    //
    // It's possible to get into a state when dropping collections when our new version is
    // less than our prev version. Behave identically to legacy mongos, for now, and warn to
    // draw attention to the problem.
    //
    // TODO: Assert in next version, to allow smooth upgrades
    //

    if (shouldReset && tempChunkManager->getVersion() < ci.getCM()->getVersion()) {
        shouldReset = false;

        warning() << "not resetting chunk manager for collection '" << ns << "', config version is "
                  << tempChunkManager->getVersion() << " and "
                  << "old version is " << ci.getCM()->getVersion();
    }

    // end legacy behavior

    if (shouldReset) {
        const auto cmOpTime = tempChunkManager->getConfigOpTime();

        // The existing ChunkManager could have been updated since we last checked, so
        // replace the existing chunk manager only if it is strictly newer.
        // The condition should be (>) than instead of (>=), but use (>=) since legacy non-repl
        // config servers will always have an opTime of zero.
        if (cmOpTime >= ci.getCM()->getConfigOpTime()) {
            ci.resetCM(tempChunkManager.release());
        }
    }

    uassert(
        15883, str::stream() << "not sharded after chunk manager reset : " << ns, ci.isSharded());

    return ci.getCM();
}
Пример #8
0
    bool MetadataLoader::initChunks(const CollectionType& collDoc,
                                    const string& ns,
                                    const string& shard,
                                    const CollectionManager* oldManager,
                                    CollectionManager* manager,
                                    string* errMsg) {

        map<string,ChunkVersion> versionMap;
        manager->_maxCollVersion = ChunkVersion(0, 0, collDoc.getEpoch());

        // Check to see if we should use the old version or not.
        if (oldManager) {

            ChunkVersion oldVersion = oldManager->getMaxShardVersion();

            if (oldVersion.isSet() && oldVersion.hasCompatibleEpoch(collDoc.getEpoch())) {

                // Our epoch for coll version and shard version should be the same.
                verify(oldManager->getMaxCollVersion().hasCompatibleEpoch(collDoc.getEpoch()));

                versionMap[shard] = oldManager->_maxShardVersion;
                manager->_maxCollVersion = oldManager->_maxCollVersion;

                // TODO: This could be made more efficient if copying not required, but
                // not as frequently reloaded as in mongos.
                manager->_chunksMap = oldManager->_chunksMap;

                LOG(2) << "loading new chunks for collection " << ns
                       << " using old chunk manager w/ version "
                       << oldManager->getMaxShardVersion()
                       << " and " << manager->_chunksMap.size() << " chunks" << endl;
            }
        }

        // Exposes the new 'manager's range map and version to the "differ," who
        // would ultimately be responsible of filling them up.
        SCMConfigDiffTracker differ(shard);
        differ.attach(ns, manager->_chunksMap, manager->_maxCollVersion, versionMap);

        try {

            scoped_ptr<ScopedDbConnection> connPtr(
                ScopedDbConnection::getInternalScopedDbConnection(_configLoc.toString(), 30));
            ScopedDbConnection& conn = *connPtr;

            auto_ptr<DBClientCursor> cursor = conn->query(ChunkType::ConfigNS,
                                                          differ.configDiffQuery());

            if (!cursor.get()) {
                // 'errMsg' was filled by the getChunkCursor() call.
                manager->_maxCollVersion = ChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }

            // Diff tracker should *always* find at least one chunk if collection exists.
            int diffsApplied = differ.calculateConfigDiff(*cursor);
            if (diffsApplied > 0) {

                LOG(2) << "loaded " << diffsApplied
                       << " chunks into new chunk manager for " << ns
                       << " with version " << manager->_maxCollVersion << endl;

                manager->_maxShardVersion = versionMap[shard];
                manager->fillRanges();
                connPtr->done();
                return true;
            }
            else if(diffsApplied == 0) {

                *errMsg = str::stream() << "no chunks found when reloading " << ns
                                        << ", previous version was "
                                        << manager->_maxCollVersion.toString();

                warning() << *errMsg << endl;

                manager->_maxCollVersion = ChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }
            else{

                // TODO: make this impossible by making sure we don't migrate / split on this
                // shard during the reload.  No chunks were found for the ns.

                *errMsg = str::stream() << "invalid chunks found when reloading " << ns
                                        << ", previous version was "
                                        << manager->_maxCollVersion.toString()
                                        << ", this should be rare";

                warning() << errMsg << endl;

                manager->_maxCollVersion = ChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }
        }
        catch (const DBException& e) {
            *errMsg = str::stream() << "caught exception accessing the config servers"
                                    << causedBy(e);

            // We deliberately do not return connPtr to the pool, since it was involved
            // with the error here.

            return false;
        }
    }
Пример #9
0
    void WriteBackListener::run() {

        int secsToSleep = 0;
        scoped_ptr<ChunkVersion> lastNeededVersion;
        int lastNeededCount = 0;
        bool needsToReloadShardInfo = false;

        while ( ! inShutdown() ) {

            if ( ! Shard::isAShardNode( _addr ) ) {
                LOG(1) << _addr << " is not a shard node" << endl;
                sleepsecs( 60 );
                continue;
            }

            try {
                if (needsToReloadShardInfo) {
                    // It's possible this shard was removed
                    Shard::reloadShardInfo();
                    needsToReloadShardInfo = false;
                }

                scoped_ptr<ScopedDbConnection> conn(
                        ScopedDbConnection::getInternalScopedDbConnection( _addr ) );

                BSONObj result;

                {
                    BSONObjBuilder cmd;
                    cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data
                    if ( ! conn->get()->runCommand( "admin" , cmd.obj() , result ) ) {
                        result = result.getOwned();
                        log() <<  "writebacklisten command failed!  "  << result << endl;
                        conn->done();
                        continue;
                    }

                }
                conn->done();

                LOG(1) << "writebacklisten result: " << result << endl;

                BSONObj data = result.getObjectField( "data" );
                if ( data.getBoolField( "writeBack" ) ) {
                    string ns = data["ns"].valuestrsafe();

                    ConnectionIdent cid( "" , 0 );
                    OID wid;
                    if ( data["connectionId"].isNumber() && data["id"].type() == jstOID ) {
                        string s = "";
                        if ( data["instanceIdent"].type() == String )
                            s = data["instanceIdent"].String();
                        cid = ConnectionIdent( s , data["connectionId"].numberLong() );
                        wid = data["id"].OID();
                    }
                    else {
                        warning() << "mongos/mongod version mismatch (1.7.5 is the split)" << endl;
                    }

                    int len; // not used, but needed for next call
                    Message msg( (void*)data["msg"].binData( len ) , false );
                    massert( 10427 ,  "invalid writeback message" , msg.header()->valid() );

                    DBConfigPtr db = grid.getDBConfig( ns );
                    ChunkVersion needVersion = ChunkVersion::fromBSON( data, "version" );

                    //
                    // TODO: Refactor the sharded strategy to correctly handle all sharding state changes itself,
                    // we can't rely on WBL to do this for us b/c anything could reset our state in-between.
                    // We should always reload here for efficiency when possible, but staleness is also caught in the
                    // loop below.
                    //

                    ChunkManagerPtr manager;
                    ShardPtr primary;
                    db->getChunkManagerOrPrimary( ns, manager, primary );

                    ChunkVersion currVersion;
                    if( manager ) currVersion = manager->getVersion();

                    LOG(1) << "connectionId: " << cid << " writebackId: " << wid << " needVersion : " << needVersion.toString()
                           << " mine : " << currVersion.toString() << endl;

                    LOG(1) << msg.toString() << endl;

                    //
                    // We should reload only if we need to update our version to be compatible *and* we
                    // haven't already done so.  This avoids lots of reloading when we remove/add a sharded collection
                    //

                    bool alreadyReloaded = lastNeededVersion &&
                                           lastNeededVersion->isEquivalentTo( needVersion );

                    if( alreadyReloaded ){

                        LOG(1) << "wbl already reloaded config information for version "
                               << needVersion << ", at version " << currVersion << endl;
                    }
                    else if( lastNeededVersion ) {

                        log() << "new version change detected to " << needVersion.toString()
                              << ", " << lastNeededCount << " writebacks processed at "
                              << lastNeededVersion->toString() << endl;

                        lastNeededCount = 0;
                    }

                    //
                    // Set our lastNeededVersion for next time
                    //

                    lastNeededVersion.reset( new ChunkVersion( needVersion ) );
                    lastNeededCount++;

                    //
                    // Determine if we should reload, if so, reload
                    //

                    bool shouldReload = ! needVersion.isWriteCompatibleWith( currVersion ) &&
                                        ! alreadyReloaded;

                    if( shouldReload && currVersion.isSet()
                                     && needVersion.isSet()
                                     && currVersion.hasCompatibleEpoch( needVersion ) )
                    {

                        //
                        // If we disagree about versions only, reload the chunk manager
                        //

                        db->getChunkManagerIfExists( ns, true );
                    }
                    else if( shouldReload ){

                        //
                        // If we disagree about anything else, reload the full db
                        //

                        warning() << "reloading config data for " << db->getName() << ", "
                                  << "wanted version " << needVersion.toString()
                                  << " but currently have version " << currVersion.toString() << endl;

                        db->reload();
                    }

                    // do request and then call getLastError
                    // we have to call getLastError so we can return the right fields to the user if they decide to call getLastError

                    BSONObj gle;
                    int attempts = 0;
                    while ( true ) {
                        attempts++;

                        try {

                            Request r( msg , 0 );
                            r.init();

                            r.d().reservedField() |= Reserved_FromWriteback;

                            ClientInfo * ci = r.getClientInfo();
                            if (!noauth) {
                                ci->getAuthorizationManager()->grantInternalAuthorization(
                                        "_writebackListener");
                            }
                            ci->noAutoSplit();

                            r.process( attempts );

                            ci->newRequest(); // this so we flip prev and cur shards

                            BSONObjBuilder b;
                            string errmsg;
                            if ( ! ci->getLastError( "admin",
                                                     BSON( "getLastError" << 1 ),
                                                     b,
                                                     errmsg,
                                                     true ) )
                            {
                                b.appendBool( "commandFailed" , true );
                                if( ! b.hasField( "errmsg" ) ){

                                    b.append( "errmsg", errmsg );
                                    gle = b.obj();
                                }
                                else if( errmsg.size() > 0 ){

                                    // Rebuild GLE object with errmsg
                                    // TODO: Make this less clumsy by improving GLE interface
                                    gle = b.obj();

                                    if( gle["errmsg"].type() == String ){

                                        BSONObj gleNoErrmsg =
                                                gle.filterFieldsUndotted( BSON( "errmsg" << 1 ),
                                                                          false );
                                        BSONObjBuilder bb;
                                        bb.appendElements( gleNoErrmsg );
                                        bb.append( "errmsg", gle["errmsg"].String() +
                                                             " ::and:: " +
                                                             errmsg );
                                        gle = bb.obj().getOwned();
                                    }
                                }
                            }
                            else{
                                gle = b.obj();
                            }

                            if ( gle["code"].numberInt() == 9517 ) {

                                log() << "new version change detected, "
                                      << lastNeededCount << " writebacks processed previously" << endl;

                                lastNeededVersion.reset();
                                lastNeededCount = 1;

                                log() << "writeback failed because of stale config, retrying attempts: " << attempts << endl;
                                LOG(1) << "writeback error : " << gle << endl;

                                //
                                // Bringing this in line with the similar retry logic elsewhere
                                //
                                // TODO: Reloading the chunk manager may not help if we dropped a
                                // collection, but we don't actually have that info in the writeback
                                // error
                                //

                                if( attempts <= 2 ){
                                    db->getChunkManagerIfExists( ns, true );
                                }
                                else{
                                    versionManager.forceRemoteCheckShardVersionCB( ns );
                                    sleepsecs( attempts - 1 );
                                }

                                uassert( 15884, str::stream()
                                         << "Could not reload chunk manager after "
                                         << attempts << " attempts.", attempts <= 4 );

                                continue;
                            }

                            ci->clearSinceLastGetError();
                        }
                        catch ( DBException& e ) {
                            error() << "error processing writeback: " << e << endl;
                            BSONObjBuilder b;
                            e.getInfo().append( b, "err", "code" );
                            gle = b.obj();
                        }

                        break;
                    }

                    {
                        scoped_lock lk( _seenWritebacksLock );
                        WBStatus& s = _seenWritebacks[cid];
                        s.id = wid;
                        s.gle = gle;
                    }
                }
                else if ( result["noop"].trueValue() ) {
                    // no-op
                }
                else {
                    log() << "unknown writeBack result: " << result << endl;
                }

                secsToSleep = 0;
                continue;
            }
            catch ( std::exception& e ) {
                // Attention! Do not call any method that would throw an exception
                // (or assert) in this block.

                if ( inShutdown() ) {
                    // we're shutting down, so just clean up
                    return;
                }

                log() << "WriteBackListener exception : " << e.what() << endl;

                needsToReloadShardInfo = true;
            }
            catch ( ... ) {
                log() << "WriteBackListener uncaught exception!" << endl;
            }
            secsToSleep++;
            sleepsecs(secsToSleep);
            if ( secsToSleep > 10 )
                secsToSleep = 0;
        }

        log() << "WriteBackListener exiting : address no longer in cluster " << _addr;

    }
Пример #10
0
        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) 
                return false;

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
            }
            
            // Handle initial shard connection
            if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){

                result.append( "initialized", true );

                // Send back wire version to let mongos know what protocol we can speak
                result.append( "minWireVersion", minWireVersion );
                result.append( "maxWireVersion", maxWireVersion );

                return true;
            }

            // we can run on a slave up to here
            if ( ! isMaster( "admin" ) ) {
                result.append( "errmsg" , "not master" );
                result.append( "note" , "from post init in setShardVersion" );
                return false;
            }

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify namespace";
                return false;
            }

            if( ! ChunkVersion::canParseBSON( cmdObj, "version" ) ){
                errmsg = "need to specify version";
                return false;
            }

            const ChunkVersion version = ChunkVersion::fromBSON( cmdObj, "version" );
            
            // step 3

            const ChunkVersion oldVersion = info->getVersion(ns);
            const ChunkVersion globalVersion = shardingState.getVersion(ns);

            oldVersion.addToBSON( result, "oldVersion" );
            
            if ( globalVersion.isSet() && version.isSet() ) {
                // this means there is no reset going on an either side
                // so its safe to make some assumptions

                if ( version.isWriteCompatibleWith( globalVersion ) ) {
                    // mongos and mongod agree!
                    if ( ! oldVersion.isWriteCompatibleWith( version ) ) {
                        if ( oldVersion < globalVersion &&
                             oldVersion.hasCompatibleEpoch(globalVersion) )
                        {
                            info->setVersion( ns , version );
                        }
                        else if ( authoritative ) {
                            // this means there was a drop and our version is reset
                            info->setVersion( ns , version );
                        }
                        else {
                            result.append( "ns" , ns );
                            result.appendBool( "need_authoritative" , true );
                            errmsg = "verifying drop on '" + ns + "'";
                            return false;
                        }
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
                // this had been reset
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
            }

            if ( ! version.isSet() && ! globalVersion.isSet() ) {
                // this connection is cleaning itself
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            // Cases below all either return OR fall-through to remote metadata reload.
            if ( version.isSet() || !globalVersion.isSet() ) {

                // Not Dropping

                // TODO: Refactor all of this
                if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) {
                    errmsg = "this connection already had a newer version of collection '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "newVersion" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    return false;
                }

                // TODO: Refactor all of this
                if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) {
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }
                    errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "version" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    result.appendBool( "reloadConfig" , true );
                    return false;
                }

                if ( ! globalVersion.isSet() && ! authoritative ) {
                    // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
                    // should require a reload.
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }

                    // need authoritative for first look
                    result.append( "ns" , ns );
                    result.appendBool( "need_authoritative" , true );
                    errmsg = "first time for collection '" + ns + "'";
                    return false;
                }

                // Fall through to metadata reload below
            }
            else {

                // Dropping

                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    globalVersion.addToBSON( result, "globalVersion" );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }

                // Fall through to metadata reload below
            }

            ChunkVersion currVersion;
            Status status = shardingState.refreshMetadataIfNeeded( ns, version, &currVersion );

            if (!status.isOK()) {

                // The reload itself was interrupted or confused here

                errmsg = str::stream() << "could not refresh metadata for " << ns
                                       << " with requested shard version " << version.toString()
                                       << ", stored shard version is " << currVersion.toString()
                                       << causedBy( status.reason() );

                warning() << errmsg << endl;

                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                currVersion.addToBSON( result, "globalVersion" );
                result.appendBool( "reloadConfig", true );

                return false;
            }
            else if ( !version.isWriteCompatibleWith( currVersion ) ) {

                // We reloaded a version that doesn't match the version mongos was trying to
                // set.

                errmsg = str::stream() << "requested shard version differs from"
                                       << " config shard version for " << ns
                                       << ", requested version is " << version.toString()
                                       << " but found version " << currVersion.toString();

                OCCASIONALLY warning() << errmsg << endl;

                // WARNING: the exact fields below are important for compatibility with mongos
                // version reload.

                result.append( "ns" , ns );
                currVersion.addToBSON( result, "globalVersion" );

                // If this was a reset of a collection or the last chunk moved out, inform mongos to
                // do a full reload.
                if (currVersion.epoch() != version.epoch() || !currVersion.isSet() ) {
                    result.appendBool( "reloadConfig", true );
                    // Zero-version also needed to trigger full mongos reload, sadly
                    // TODO: Make this saner, and less impactful (full reload on last chunk is bad)
                    ChunkVersion( 0, 0, OID() ).addToBSON( result, "version" );
                    // For debugging
                    version.addToBSON( result, "origVersion" );
                }
                else {
                    version.addToBSON( result, "version" );
                }

                return false;
            }

            info->setVersion( ns , version );
            return true;
        }
Пример #11
0
    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) {

        if ( ! shardingState.enabled() )
            return true;

        if ( ! isMasterNs( ns.c_str() ) )  {
            // right now connections to secondaries aren't versioned at all
            return true;
        }

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO : all collections at some point, be sharded or not, will have a version
        //  (and a CollectionMetadata)
        received = info->getVersion( ns );
        wanted = shardingState.getVersion( ns );

        if( received.isWriteCompatibleWith( wanted ) ) return true;

        //
        // Figure out exactly why not compatible, send appropriate error message
        // The versions themselves are returned in the error, so not needed in messages here
        //

        // Check epoch first, to send more meaningful message, since other parameters probably
        // won't match either
        if( ! wanted.hasCompatibleEpoch( received ) ){
            errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", "
                                   << "the collection may have been dropped and recreated";
            return false;
        }

        if( ! wanted.isSet() && received.isSet() ){
            errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", "
                                   << "the collection may have been dropped";
            return false;
        }

        if( wanted.isSet() && ! received.isSet() ){
            errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", "
                                   << "but no version set in request";
            return false;
        }

        if( wanted.majorVersion() != received.majorVersion() ){

            //
            // Could be > or < - wanted is > if this is the source of a migration,
            // wanted < if this is the target of a migration
            //

            errmsg = str::stream() << "version mismatch detected for " << ns << ", "
                                   << "stored major version " << wanted.majorVersion()
                                   << " does not match received " << received.majorVersion();
            return false;
        }

        // Those are all the reasons the versions can mismatch
        verify( false );

        return false;

    }
Пример #12
0
        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) 
                return false;

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }
            

            // Handle initial shard connection
            if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){
                result.append( "initialized", true );
                return true;
            }

            // we can run on a slave up to here
            if ( ! isMaster( "admin" ) ) {
                result.append( "errmsg" , "not master" );
                result.append( "note" , "from post init in setShardVersion" );
                return false;
            }

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify namespace";
                return false;
            }

            if( ! ConfigVersion::canParseBSON( cmdObj, "version" ) ){
                errmsg = "need to specify version";
                return false;
            }

            const ConfigVersion version = ConfigVersion::fromBSON( cmdObj, "version" );
            
            // step 3

            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);

            oldVersion.addToBSON( result, "oldVersion" );
            
            if ( globalVersion.isSet() && version.isSet() ) {
                // this means there is no reset going on an either side
                // so its safe to make some assumptions

                if ( version.isWriteCompatibleWith( globalVersion ) ) {
                    // mongos and mongod agree!
                    if ( ! oldVersion.isWriteCompatibleWith( version ) ) {
                        if ( oldVersion < globalVersion &&
                             oldVersion.hasCompatibleEpoch(globalVersion) )
                        {
                            info->setVersion( ns , version );
                        }
                        else if ( authoritative ) {
                            // this means there was a drop and our version is reset
                            info->setVersion( ns , version );
                        }
                        else {
                            result.append( "ns" , ns );
                            result.appendBool( "need_authoritative" , true );
                            errmsg = "verifying drop on '" + ns + "'";
                            return false;
                        }
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            Lock::GlobalWrite setShardVersionLock; // TODO: can we get rid of this??
            
            if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
                // this had been reset
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
            }

            if ( ! version.isSet() && ! globalVersion.isSet() ) {
                // this connection is cleaning itself
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            if ( ! version.isSet() && globalVersion.isSet() ) {
                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    globalVersion.addToBSON( result, "globalVersion" );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                globalVersion.addToBSON( result, "beforeDrop" );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.resetVersion( ns );
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            // TODO: Refactor all of this
            if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) {
                errmsg = "this connection already had a newer version of collection '" + ns + "'";
                result.append( "ns" , ns );
                version.addToBSON( result, "newVersion" );
                globalVersion.addToBSON( result, "globalVersion" );
                return false;
            }

            // TODO: Refactor all of this
            if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) {
                while ( shardingState.inCriticalMigrateSection() ) {
                    log() << "waiting till out of critical section" << endl;
                    dbtemprelease r;
                    shardingState.waitTillNotInCriticalSection( 10 );
                }
                errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                globalVersion.addToBSON( result, "globalVersion" );
                result.appendBool( "reloadConfig" , true );
                return false;
            }

            if ( ! globalVersion.isSet() && ! authoritative ) {
                // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
                // should require a reload.
                while ( shardingState.inCriticalMigrateSection() ) {
                    log() << "waiting till out of critical section" << endl;
                    dbtemprelease r;
                    shardingState.waitTillNotInCriticalSection( 10 );
                }

                // need authoritative for first look
                result.append( "ns" , ns );
                result.appendBool( "need_authoritative" , true );
                errmsg = "first time for collection '" + ns + "'";
                return false;
            }

            Timer relockTime;
            {
                dbtemprelease unlock;

                ChunkVersion currVersion = version;
                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                    errmsg = str::stream() << "client version differs from config's for collection '" << ns << "'";
                    result.append( "ns" , ns );

                    // If this was a reset of a collection, inform mongos to do a full reload
                    if( ! currVersion.isSet()  ){
                        ConfigVersion( 0, OID() ).addToBSON( result, "version" );
                        result.appendBool( "reloadConfig", true );
                    }
                    else{
                        version.addToBSON( result, "version" );
                    }

                    globalVersion.addToBSON( result, "globalVersion" );
                    return false;
                }
            }
            if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
                log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
            }
            
            info->setVersion( ns , version );
            return true;
        }
Пример #13
0
    /**
     * @return true if had to do something
     */
    bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) {
        // TODO: cache, optimize, etc...

        WriteBackListener::init( *conn_in );

        DBConfigPtr conf = grid.getDBConfig( ns );
        if ( ! conf )
            return false;

        DBClientBase* conn = getVersionable( conn_in );
        verify(conn); // errors thrown above

        unsigned long long officialSequenceNumber = 0;

        ChunkManagerPtr manager;
        const bool isSharded = conf->isSharded( ns );
        if ( isSharded ) {
            manager = conf->getChunkManagerIfExists( ns , authoritative );
            // It's possible the chunk manager was reset since we checked whether sharded was true,
            // so must check this here.
            if( manager ) officialSequenceNumber = manager->getSequenceNumber();
        }

        // Check this manager against the reference manager
        if( isSharded && manager ){

            Shard shard = Shard::make( conn->getServerAddress() );
            if( refManager && ! refManager->compatibleWith( manager, shard ) ){
                throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString()  << " : " << manager->getSequenceNumber() << ") "
                                                                      << "not compatible with reference manager (" << refManager->getVersion( shard ).toString()  << " : " << refManager->getSequenceNumber() << ") "
                                                                      << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")",
                                                refManager->getVersion( shard ), manager->getVersion( shard ) );
            }
        }
        else if( refManager ){
            Shard shard = Shard::make( conn->getServerAddress() );
            string msg( str::stream() << "not sharded ("
                        << ( (manager.get() == 0) ? string( "<none>" ) :
                                str::stream() << manager->getSequenceNumber() )
                        << ") but has reference manager ("
                        << refManager->getSequenceNumber() << ") "
                        << "on conn " << conn->getServerAddress() << " ("
                        << conn_in->getServerAddress() << ")" );

            throw SendStaleConfigException( ns, msg,
                    refManager->getVersion( shard ), ChunkVersion( 0, OID() ));
        }

        // has the ChunkManager been reloaded since the last time we updated the connection-level version?
        // (ie., last time we issued the setShardVersions below)
        unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns);
        if ( sequenceNumber == officialSequenceNumber ) {
            return false;
        }


        ChunkVersion version = ChunkVersion( 0, OID() );
        if ( isSharded && manager ) {
            version = manager->getVersion( Shard::make( conn->getServerAddress() ) );
        }

        if( ! version.isSet() ){
            LOG(0) << "resetting shard version of " << ns << " on " << conn->getServerAddress() << ", " <<
                      ( ! isSharded ? "no longer sharded" :
                      ( ! manager ? "no chunk manager found" :
                                    "version is zero" ) ) << endl;
        }

        LOG(2) << " have to set shard version for conn: " << conn->getServerAddress() << " ns:" << ns
               << " my last seq: " << sequenceNumber << "  current: " << officialSequenceNumber
               << " version: " << version << " manager: " << manager.get()
               << endl;

        const string versionableServerAddress(conn->getServerAddress());

        BSONObj result;
        if ( setShardVersion( *conn , ns , version , manager , authoritative , result ) ) {
            // success!
            LOG(1) << "      setShardVersion success: " << result << endl;
            connectionShardStatus.setSequence( conn , ns , officialSequenceNumber );
            return true;
        }

        LOG(1) << "       setShardVersion failed!\n" << result << endl;

        if ( result["need_authoritative"].trueValue() )
            massert( 10428 ,  "need_authoritative set but in authoritative mode already" , ! authoritative );

        if ( ! authoritative ) {
            // use the original connection and get a fresh versionable connection
            // since conn can be invalidated (or worse, freed) after the failure
            checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1);
            return true;
        }
        
        if ( result["reloadConfig"].trueValue() ) {
            if( result["version"].timestampTime() == 0 ){

                warning() << "reloading full configuration for " << conf->getName()
                          << ", connection state indicates significant version changes" << endl;

                // reload db
                conf->reload();
            }
            else {
                // reload config
                conf->getChunkManager( ns , true );
            }
        }

        const int maxNumTries = 7;
        if ( tryNumber < maxNumTries ) {
            LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) 
                << "going to retry checkShardVersion host: " << versionableServerAddress << " " << result << endl;
            sleepmillis( 10 * tryNumber );
            // use the original connection and get a fresh versionable connection
            // since conn can be invalidated (or worse, freed) after the failure
            checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1);
            return true;
        }
        
        string errmsg = str::stream() << "setShardVersion failed host: " << versionableServerAddress << " " << result;
        log() << "     " << errmsg << endl;
        massert( 10429 , errmsg , 0 );
        return true;
    }