Esempio n. 1
0
 CollectionManager* MetadataLoader::makeEmptyCollectionManager() {
     CollectionManager* manager = new CollectionManager;
     manager->_maxCollVersion = ChunkVersion(1, 0, OID());
     manager->_maxShardVersion = ChunkVersion(1, 0, OID());
     dassert(manager->isValid());
     return manager;
 }
Esempio n. 2
0
/**
 * Returns true if request is a query for sharded indexes.
 */
static bool doShardedIndexQuery(OperationContext* txn, Request& r, const QuerySpec& qSpec) {
    // Extract the ns field from the query, which may be embedded within the "query" or
    // "$query" field.
    auto nsField = qSpec.filter()["ns"];
    if (nsField.eoo()) {
        return false;
    }
    const NamespaceString indexNSSQuery(nsField.str());

    auto status = grid.catalogCache()->getDatabase(txn, indexNSSQuery.db().toString());
    if (!status.isOK()) {
        return false;
    }

    shared_ptr<DBConfig> config = status.getValue();
    if (!config->isSharded(indexNSSQuery.ns())) {
        return false;
    }

    // if you are querying on system.indexes, we need to make sure we go to a shard
    // that actually has chunks. This is not a perfect solution (what if you just
    // look at all indexes), but better than doing nothing.

    ShardPtr shard;
    ChunkManagerPtr cm;
    config->getChunkManagerOrPrimary(indexNSSQuery.ns(), cm, shard);
    if (cm) {
        set<ShardId> shardIds;
        cm->getAllShardIds(&shardIds);
        verify(shardIds.size() > 0);
        shard = grid.shardRegistry()->getShard(*shardIds.begin());
    }

    ShardConnection dbcon(shard->getConnString(), r.getns());
    DBClientBase& c = dbcon.conn();

    string actualServer;

    Message response;
    bool ok = c.call(r.m(), response, true, &actualServer);
    uassert(10200, "mongos: error calling db", ok);

    {
        QueryResult::View qr = response.singleData().view2ptr();
        if (qr.getResultFlags() & ResultFlag_ShardConfigStale) {
            dbcon.done();
            // Version is zero b/c this is deprecated codepath
            throw RecvStaleConfigException(r.getns(),
                                           "Strategy::doQuery",
                                           ChunkVersion(0, 0, OID()),
                                           ChunkVersion(0, 0, OID()));
        }
    }

    r.reply(response, actualServer.size() ? actualServer : c.getServerAddress());
    dbcon.done();

    return true;
}
Esempio n. 3
0
    /**
     * Returns true if request is a query for sharded indexes.
     */
    static bool doShardedIndexQuery( Request& r, const QuerySpec& qSpec ) {

        // Extract the ns field from the query, which may be embedded within the "query" or
        // "$query" field.
        string indexNSQuery(qSpec.filter()["ns"].str());
        DBConfigPtr config = grid.getDBConfig( r.getns() );

        if ( !config->isSharded( indexNSQuery )) {
            return false;
        }

        // if you are querying on system.indexes, we need to make sure we go to a shard
        // that actually has chunks. This is not a perfect solution (what if you just
        // look at all indexes), but better than doing nothing.

        ShardPtr shard;
        ChunkManagerPtr cm;
        config->getChunkManagerOrPrimary( indexNSQuery, cm, shard );
        if ( cm ) {
            set<Shard> shards;
            cm->getAllShards( shards );
            verify( shards.size() > 0 );
            shard.reset( new Shard( *shards.begin() ) );
        }

        ShardConnection dbcon( *shard , r.getns() );
        DBClientBase &c = dbcon.conn();

        string actualServer;

        Message response;
        bool ok = c.call( r.m(), response, true , &actualServer );
        uassert( 10200 , "mongos: error calling db", ok );

        {
            QueryResult *qr = (QueryResult *) response.singleData();
            if ( qr->resultFlags() & ResultFlag_ShardConfigStale ) {
                dbcon.done();
                // Version is zero b/c this is deprecated codepath
                throw RecvStaleConfigException( r.getns(),
                                                "Strategy::doQuery",
                                                ChunkVersion( 0, 0, OID() ),
                                                ChunkVersion( 0, 0, OID() ));
            }
        }

        r.reply( response , actualServer.size() ? actualServer : c.getServerAddress() );
        dbcon.done();

        return true;
    }
    ShardChunkManager* ShardChunkManager::cloneMinus( const BSONObj& min, const BSONObj& max, const ChunkVersion& version ) {

        // check that we have the exact chunk that will be subtracted
        _assertChunkExists( min , max );

        auto_ptr<ShardChunkManager> p( new ShardChunkManager );
        p->_key = this->_key;

        if ( _chunksMap.size() == 1 ) {
            // if left with no chunks, just reset version
            uassert( 13590 , str::stream() << "setting version to " << version.toString() << " on removing last chunk", ! version.isSet() );

            p->_version = ChunkVersion( 0, OID() );
            p->_collVersion = _collVersion;

        }
        else {
            // can't move version backwards when subtracting chunks
            // this is what guarantees that no read or write would be taken once we subtract data from the current shard
            if ( version <= _version ) {
                uasserted( 13585 , str::stream() << "version " << version.toString() << " not greater than " << _version.toString() );
            }

            p->_chunksMap = this->_chunksMap;
            p->_chunksMap.erase( min );
            p->_version = version;
            if( version > _collVersion ) p->_collVersion = version;
            else p->_collVersion = this->_collVersion;
            p->_fillRanges();
        }

        return p.release();
    }
Esempio n. 5
0
    void ShardingState::donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ChunkVersion version ) {
        scoped_lock lk( _mutex );

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        verify( it != _collMetadata.end() ) ;
        CollectionMetadataPtr p = it->second;

        // empty shards should have version 0
        version =
                ( p->getNumChunks() > 1 ) ?
                        version : ChunkVersion( 0, 0, p->getCollVersion().epoch() );

        ChunkType chunk;
        chunk.setMin( min );
        chunk.setMax( max );
        string errMsg;

        CollectionMetadataPtr cloned( p->cloneMigrate( chunk, version, &errMsg ) );
        // uassert to match old behavior, TODO: report errors w/o throwing
        uassert( 16855, errMsg, NULL != cloned.get() );

        // TODO: a bit dangerous to have two different zero-version states - no-metadata and
        // no-version
        _collMetadata[ns] = cloned;
    }
const ChunkVersion ShardedConnectionInfo::getVersion(const std::string& ns) const {
    NSVersionMap::const_iterator it = _versions.find(ns);
    if (it != _versions.end()) {
        return it->second;
    } else {
        return ChunkVersion(0, 0, OID());
    }
}
Esempio n. 7
0
ChunkManager::ChunkManager(const CollectionType& coll)
    : _ns(coll.getNs().ns()),
      _keyPattern(coll.getKeyPattern()),
      _unique(coll.getUnique()),
      _sequenceNumber(NextSequenceNumber.addAndFetch(1)),
      _chunkRanges() {
    // coll does not have correct version. Use same initial version as _load and createFirstChunks.
    _version = ChunkVersion(0, 0, coll.getEpoch());
}
Esempio n. 8
0
ChunkVersion ChunkManager::getVersion(const std::string& shardName) const {
    ShardVersionMap::const_iterator i = _shardVersions.find(shardName);
    if (i == _shardVersions.end()) {
        // Shards without explicitly tracked shard versions (meaning they have
        // no chunks) always have a version of (0, 0, epoch).  Note this is
        // *different* from the dropped chunk version of (0, 0, OID(000...)).
        // See s/chunk_version.h.
        return ChunkVersion(0, 0, _version.epoch());
    }
    return i->second;
}
Esempio n. 9
0
    const ChunkVersion ShardingState::getVersion( const string& ns ) const {
        scoped_lock lk(_mutex);

        CollectionMetadataMap::const_iterator it = _collMetadata.find( ns );
        if ( it != _collMetadata.end() ) {
            CollectionMetadataPtr p = it->second;
            return p->getShardVersion();
        }
        else {
            return ChunkVersion( 0, OID() );
        }
    }
Esempio n. 10
0
void ChunkManager::createFirstChunks(OperationContext* txn,
                                     const ShardId& primaryShardId,
                                     const vector<BSONObj>* initPoints,
                                     const set<ShardId>* initShardIds) {
    // TODO distlock?
    // TODO: Race condition if we shard the collection and insert data while we split across
    // the non-primary shard.

    vector<BSONObj> splitPoints;
    vector<ShardId> shardIds;
    calcInitSplitsAndShards(txn, primaryShardId, initPoints, initShardIds, &splitPoints, &shardIds);


    // this is the first chunk; start the versioning from scratch
    ChunkVersion version;
    version.incEpoch();
    version.incMajor();

    log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << _ns
          << " using new epoch " << version.epoch();

    for (unsigned i = 0; i <= splitPoints.size(); i++) {
        BSONObj min = i == 0 ? _keyPattern.getKeyPattern().globalMin() : splitPoints[i - 1];
        BSONObj max =
            i < splitPoints.size() ? splitPoints[i] : _keyPattern.getKeyPattern().globalMax();

        Chunk temp(this, min, max, shardIds[i % shardIds.size()], version);

        BSONObjBuilder chunkBuilder;
        temp.serialize(chunkBuilder);

        BSONObj chunkObj = chunkBuilder.obj();

        Status result = grid.catalogManager(txn)->update(txn,
                                                         ChunkType::ConfigNS,
                                                         BSON(ChunkType::name(temp.genID())),
                                                         chunkObj,
                                                         true,
                                                         false,
                                                         NULL);

        version.incMinor();

        if (!result.isOK()) {
            string ss = str::stream()
                << "creating first chunks failed. result: " << result.reason();
            error() << ss;
            msgasserted(15903, ss);
        }
    }

    _version = ChunkVersion(0, 0, version.epoch());
}
Esempio n. 11
0
    void ShardingState::donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ChunkVersion version ) {
        scoped_lock lk( _mutex );

        ChunkManagersMap::const_iterator it = _chunks.find( ns );
        verify( it != _chunks.end() ) ;
        ShardChunkManagerPtr p = it->second;

        // empty shards should have version 0
        version = ( p->getNumChunks() > 1 ) ? version : ChunkVersion( 0 , OID() );

        ShardChunkManagerPtr cloned( p->cloneMinus( min , max , version ) );
        // TODO: a bit dangerous to have two different zero-version states - no-manager and
        // no-version
        _chunks[ns] = cloned;
    }
Esempio n. 12
0
Status ChunkManager::createFirstChunks(OperationContext* txn,
                                       const ShardId& primaryShardId,
                                       const vector<BSONObj>* initPoints,
                                       const set<ShardId>* initShardIds) {
    // TODO distlock?
    // TODO: Race condition if we shard the collection and insert data while we split across
    // the non-primary shard.

    vector<BSONObj> splitPoints;
    vector<ShardId> shardIds;
    calcInitSplitsAndShards(txn, primaryShardId, initPoints, initShardIds, &splitPoints, &shardIds);

    // this is the first chunk; start the versioning from scratch
    ChunkVersion version(1, 0, OID::gen());

    log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << _ns
          << " using new epoch " << version.epoch();

    for (unsigned i = 0; i <= splitPoints.size(); i++) {
        BSONObj min = i == 0 ? _keyPattern.getKeyPattern().globalMin() : splitPoints[i - 1];
        BSONObj max =
            i < splitPoints.size() ? splitPoints[i] : _keyPattern.getKeyPattern().globalMax();

        ChunkType chunk;
        chunk.setName(Chunk::genID(_ns, min));
        chunk.setNS(_ns);
        chunk.setMin(min);
        chunk.setMax(max);
        chunk.setShard(shardIds[i % shardIds.size()]);
        chunk.setVersion(version);

        Status status = grid.catalogManager(txn)
                        ->insertConfigDocument(txn, ChunkType::ConfigNS, chunk.toBSON());
        if (!status.isOK()) {
            const string errMsg = str::stream()
                                  << "Creating first chunks failed: " << status.reason();
            error() << errMsg;
            return Status(status.code(), errMsg);
        }

        version.incMinor();
    }

    _version = ChunkVersion(0, 0, version.epoch());

    return Status::OK();
}
Esempio n. 13
0
    Status MetadataLoader::initCollection( const string& ns,
                                           const string& shard,
                                           CollectionMetadata* metadata )
    {
        //
        // Bring collection entry from the config server.
        //

        BSONObj collObj;
        {
            try {
                ScopedDbConnection conn( _configLoc.toString(), 30 );
                collObj = conn->findOne( CollectionType::ConfigNS, QUERY(CollectionType::ns()<<ns));
                conn.done();
            }
            catch ( const DBException& e ) {
                string errMsg = str::stream() << "could not query collection metadata"
                                              << causedBy( e );

                // We deliberately do not return conn to the pool, since it was involved
                // with the error here.

                return Status( ErrorCodes::HostUnreachable, errMsg );
            }
        }

        CollectionType collDoc;
        string errMsg;
        if ( !collDoc.parseBSON( collObj, &errMsg ) || !collDoc.isValid( &errMsg ) ) {
            return Status( ErrorCodes::FailedToParse, errMsg );
        }

        //
        // Load or generate default chunks for collection config.
        //

        if ( collDoc.isKeyPatternSet() && !collDoc.getKeyPattern().isEmpty() ) {

            metadata->_keyPattern = collDoc.getKeyPattern();
            metadata->_shardVersion = ChunkVersion( 0, 0, collDoc.getEpoch() );
            metadata->_collVersion = ChunkVersion( 0, 0, collDoc.getEpoch() );

            return Status::OK();
        }
        else if ( collDoc.isPrimarySet() && collDoc.getPrimary() == shard ) {

            if ( shard == "" ) {
                warning() << "shard not verified, assuming collection " << ns
                          << " is unsharded on this shard" << endl;
            }

            metadata->_keyPattern = BSONObj();
            metadata->_shardVersion = ChunkVersion( 1, 0, collDoc.getEpoch() );
            metadata->_collVersion = metadata->_shardVersion;

            return Status::OK();
        }
        else {
            errMsg = str::stream() << "collection " << ns << " does not have a shard key "
                    << "and primary " << ( collDoc.isPrimarySet() ? collDoc.getPrimary() : "" )
                    << " does not match this shard " << shard;
            return Status( ErrorCodes::RemoteChangeDetected, errMsg );
        }
    }
Esempio n. 14
0
    Status MetadataLoader::initChunks( const string& ns,
                                       const string& shard,
                                       const CollectionMetadata* oldMetadata,
                                       CollectionMetadata* metadata )
    {
        map<string, ChunkVersion> versionMap;
        OID epoch = metadata->getCollVersion().epoch();

        // Check to see if we should use the old version or not.
        if ( oldMetadata ) {

            ChunkVersion oldVersion = oldMetadata->getShardVersion();

            if ( oldVersion.isSet() && oldVersion.hasCompatibleEpoch( epoch ) ) {

                // Our epoch for coll version and shard version should be the same.
                verify( oldMetadata->getCollVersion().hasCompatibleEpoch( epoch ) );

                versionMap[shard] = oldMetadata->_shardVersion;
                metadata->_collVersion = oldMetadata->_collVersion;

                // TODO: This could be made more efficient if copying not required, but
                // not as frequently reloaded as in mongos.
                metadata->_chunksMap = oldMetadata->_chunksMap;

                LOG(2) << "loading new chunks for collection " << ns
                              << " using old metadata w/ version " << oldMetadata->getShardVersion()
                              << " and " << metadata->_chunksMap.size() << " chunks" << endl;
            }
        }

        // Exposes the new metadata's range map and version to the "differ," who
        // would ultimately be responsible of filling them up.
        SCMConfigDiffTracker differ( shard );
        differ.attach( ns, metadata->_chunksMap, metadata->_collVersion, versionMap );

        try {

            ScopedDbConnection conn( _configLoc.toString(), 30 );

            auto_ptr<DBClientCursor> cursor = conn->query( ChunkType::ConfigNS,
                                                           differ.configDiffQuery() );

            if ( !cursor.get() ) {
                metadata->_collVersion = ChunkVersion();
                metadata->_chunksMap.clear();
                conn.done();
                return Status( ErrorCodes::HostUnreachable,
                               "problem opening chunk metadata cursor" );
            }

            // Diff tracker should *always* find at least one chunk if this shard owns a chunk.
            int diffsApplied = differ.calculateConfigDiff( *cursor );
            if ( diffsApplied > 0 ) {

                LOG(2) << "loaded " << diffsApplied << " chunks into new metadata for " << ns
                           << " with version " << metadata->_collVersion << endl;

                metadata->_shardVersion = versionMap[shard];
                metadata->fillRanges();
                conn.done();
                return Status::OK();
            }
            else if ( diffsApplied == 0 ) {

                warning() << "no chunks found when reloading " << ns << ", previous version was "
                          << metadata->_collVersion.toString() << endl;

                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();
                return Status::OK();
            }
            else {

                // TODO: make this impossible by making sure we don't migrate / split on this
                // shard during the reload.  No chunks were found for the ns.

                string errMsg = str::stream() << "invalid chunks found when reloading " << ns
                                              << ", previous version was "
                                              << metadata->_collVersion.toString()
                                              << ", this should be rare";

                warning() << errMsg << endl;

                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();
                return Status( ErrorCodes::RemoteChangeDetected, errMsg );
            }
        }
        catch ( const DBException& e ) {
            string errMsg = str::stream() << "problem querying chunks metadata" << causedBy( e );

            // We deliberately do not return connPtr to the pool, since it was involved
            // with the error here.

            return Status( ErrorCodes::HostUnreachable, errMsg );
        }
    }
Esempio n. 15
0
    bool MetadataLoader::initCollection(const string& ns,
                                        const string& shard,
                                        const CollectionManager* oldManager,
                                        CollectionManager* manager,
                                        string* errMsg) {
        //
        // Bring collection entry from the config server.
        //

        BSONObj collObj;
        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(
                    ScopedDbConnection::getInternalScopedDbConnection(_configLoc.toString(), 30));
                ScopedDbConnection& conn = *connPtr;

                collObj = conn->findOne(CollectionType::ConfigNS, QUERY(CollectionType::ns()<<ns));
            }
            catch (const DBException& e) {
                *errMsg = str::stream() << "caught exception accessing the config servers "
                                        << causedBy(e);

                // We deliberately do not return connPtr to the pool, since it was involved
                // with the error here.

                return false;
            }

            connPtr->done();
        }

        CollectionType collDoc;
        if (!collDoc.parseBSON(collObj, errMsg) || !collDoc.isValid(errMsg)) {
            return false;
        }

        //
        // Load or generate default chunks for collection config.
        //

        if (!collDoc.getKeyPattern().isEmpty()) {

            manager->_key = collDoc.getKeyPattern();

            if(!initChunks(collDoc, ns, shard, oldManager, manager, errMsg)){
                return false;
            }
        }
        else if(collDoc.getPrimary() == shard) {

            if (shard == "") {
                warning() << "shard not verified, assuming collection "
                          << ns << " is unsharded on this shard" << endl;
            }

            manager->_key = BSONObj();
            manager->_maxShardVersion = ChunkVersion(1, 0, collDoc.getEpoch());
            manager->_maxCollVersion = manager->_maxShardVersion;
        }
        else {
            *errMsg = str::stream() << "collection " << ns << " does not have a shard key "
                                    << "and primary " << collDoc.getPrimary()
                                    << " does not match this shard " << shard;
            return false;
        }

        return true;
    }
Esempio n. 16
0
    Status MetadataLoader::initCollection( const string& ns,
                                           const string& shard,
                                           CollectionMetadata* metadata ) const
    {
        //
        // Bring collection entry from the config server.
        //

        BSONObj collDoc;
        {
            try {
                ScopedDbConnection conn( _configLoc.toString(), 30 );
                collDoc = conn->findOne( CollectionType::ConfigNS, QUERY(CollectionType::ns()<<ns));
                conn.done();
            }
            catch ( const DBException& e ) {
                string errMsg = str::stream() << "could not query collection metadata"
                                              << causedBy( e );

                // We deliberately do not return conn to the pool, since it was involved
                // with the error here.

                return Status( ErrorCodes::HostUnreachable, errMsg );
            }
        }

        string errMsg;
        if ( collDoc.isEmpty() ) {

            errMsg = str::stream() << "could not load metadata, collection " << ns << " not found";
            warning() << errMsg << endl;

            return Status( ErrorCodes::NamespaceNotFound, errMsg );
        }

        CollectionType collInfo;
        if ( !collInfo.parseBSON( collDoc, &errMsg ) || !collInfo.isValid( &errMsg ) ) {

            errMsg = str::stream() << "could not parse metadata for collection " << ns
                                   << causedBy( errMsg );
            warning() << errMsg << endl;

            return Status( ErrorCodes::FailedToParse, errMsg );
        }

        if ( collInfo.isDroppedSet() && collInfo.getDropped() ) {

            errMsg = str::stream() << "could not load metadata, collection " << ns
                                   << " was dropped";
            warning() << errMsg << endl;

            return Status( ErrorCodes::NamespaceNotFound, errMsg );
        }

        if ( collInfo.isKeyPatternSet() && !collInfo.getKeyPattern().isEmpty() ) {

            // Sharded collection, need to load chunks

            metadata->_keyPattern = collInfo.getKeyPattern();
            metadata->_shardVersion = ChunkVersion( 0, 0, collInfo.getEpoch() );
            metadata->_collVersion = ChunkVersion( 0, 0, collInfo.getEpoch() );

            return Status::OK();
        }
        else if ( collInfo.isPrimarySet() && collInfo.getPrimary() == shard ) {

            // A collection with a non-default primary

            // Empty primary field not allowed if set
            dassert( collInfo.getPrimary() != "" );

            metadata->_keyPattern = BSONObj();
            metadata->_shardVersion = ChunkVersion( 1, 0, collInfo.getEpoch() );
            metadata->_collVersion = metadata->_shardVersion;

            return Status::OK();
        }
        else {

            // A collection with a primary that doesn't match this shard or is empty, the primary
            // may have changed before we loaded.

            errMsg = // br
                    str::stream() << "collection " << ns << " does not have a shard key "
                                  << "and primary "
                                  << ( collInfo.isPrimarySet() ? collInfo.getPrimary() : "" )
                                  << " does not match this shard " << shard;

            warning() << errMsg << endl;

            metadata->_collVersion = ChunkVersion( 0, 0, OID() );

            return Status( ErrorCodes::RemoteChangeDetected, errMsg );
        }
    }
Esempio n. 17
0
    bool MetadataLoader::initChunks(const CollectionType& collDoc,
                                    const string& ns,
                                    const string& shard,
                                    const CollectionManager* oldManager,
                                    CollectionManager* manager,
                                    string* errMsg) {

        map<string,ChunkVersion> versionMap;
        manager->_maxCollVersion = ChunkVersion(0, 0, collDoc.getEpoch());

        // Check to see if we should use the old version or not.
        if (oldManager) {

            ChunkVersion oldVersion = oldManager->getMaxShardVersion();

            if (oldVersion.isSet() && oldVersion.hasCompatibleEpoch(collDoc.getEpoch())) {

                // Our epoch for coll version and shard version should be the same.
                verify(oldManager->getMaxCollVersion().hasCompatibleEpoch(collDoc.getEpoch()));

                versionMap[shard] = oldManager->_maxShardVersion;
                manager->_maxCollVersion = oldManager->_maxCollVersion;

                // TODO: This could be made more efficient if copying not required, but
                // not as frequently reloaded as in mongos.
                manager->_chunksMap = oldManager->_chunksMap;

                LOG(2) << "loading new chunks for collection " << ns
                       << " using old chunk manager w/ version "
                       << oldManager->getMaxShardVersion()
                       << " and " << manager->_chunksMap.size() << " chunks" << endl;
            }
        }

        // Exposes the new 'manager's range map and version to the "differ," who
        // would ultimately be responsible of filling them up.
        SCMConfigDiffTracker differ(shard);
        differ.attach(ns, manager->_chunksMap, manager->_maxCollVersion, versionMap);

        try {

            scoped_ptr<ScopedDbConnection> connPtr(
                ScopedDbConnection::getInternalScopedDbConnection(_configLoc.toString(), 30));
            ScopedDbConnection& conn = *connPtr;

            auto_ptr<DBClientCursor> cursor = conn->query(ChunkType::ConfigNS,
                                                          differ.configDiffQuery());

            if (!cursor.get()) {
                // 'errMsg' was filled by the getChunkCursor() call.
                manager->_maxCollVersion = ChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }

            // Diff tracker should *always* find at least one chunk if collection exists.
            int diffsApplied = differ.calculateConfigDiff(*cursor);
            if (diffsApplied > 0) {

                LOG(2) << "loaded " << diffsApplied
                       << " chunks into new chunk manager for " << ns
                       << " with version " << manager->_maxCollVersion << endl;

                manager->_maxShardVersion = versionMap[shard];
                manager->fillRanges();
                connPtr->done();
                return true;
            }
            else if(diffsApplied == 0) {

                *errMsg = str::stream() << "no chunks found when reloading " << ns
                                        << ", previous version was "
                                        << manager->_maxCollVersion.toString();

                warning() << *errMsg << endl;

                manager->_maxCollVersion = ChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }
            else{

                // TODO: make this impossible by making sure we don't migrate / split on this
                // shard during the reload.  No chunks were found for the ns.

                *errMsg = str::stream() << "invalid chunks found when reloading " << ns
                                        << ", previous version was "
                                        << manager->_maxCollVersion.toString()
                                        << ", this should be rare";

                warning() << errMsg << endl;

                manager->_maxCollVersion = ChunkVersion();
                manager->_chunksMap.clear();
                connPtr->done();
                return false;
            }
        }
        catch (const DBException& e) {
            *errMsg = str::stream() << "caught exception accessing the config servers"
                                    << causedBy(e);

            // We deliberately do not return connPtr to the pool, since it was involved
            // with the error here.

            return false;
        }
    }
Esempio n. 18
0
        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) 
                return false;

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }
            

            // Handle initial shard connection
            if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){
                result.append( "initialized", true );
                return true;
            }

            // we can run on a slave up to here
            if ( ! isMaster( "admin" ) ) {
                result.append( "errmsg" , "not master" );
                result.append( "note" , "from post init in setShardVersion" );
                return false;
            }

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify namespace";
                return false;
            }

            if( ! ConfigVersion::canParseBSON( cmdObj, "version" ) ){
                errmsg = "need to specify version";
                return false;
            }

            const ConfigVersion version = ConfigVersion::fromBSON( cmdObj, "version" );
            
            // step 3

            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);

            oldVersion.addToBSON( result, "oldVersion" );
            
            if ( globalVersion.isSet() && version.isSet() ) {
                // this means there is no reset going on an either side
                // so its safe to make some assumptions

                if ( version.isWriteCompatibleWith( globalVersion ) ) {
                    // mongos and mongod agree!
                    if ( ! oldVersion.isWriteCompatibleWith( version ) ) {
                        if ( oldVersion < globalVersion &&
                             oldVersion.hasCompatibleEpoch(globalVersion) )
                        {
                            info->setVersion( ns , version );
                        }
                        else if ( authoritative ) {
                            // this means there was a drop and our version is reset
                            info->setVersion( ns , version );
                        }
                        else {
                            result.append( "ns" , ns );
                            result.appendBool( "need_authoritative" , true );
                            errmsg = "verifying drop on '" + ns + "'";
                            return false;
                        }
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            Lock::GlobalWrite setShardVersionLock; // TODO: can we get rid of this??
            
            if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
                // this had been reset
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
            }

            if ( ! version.isSet() && ! globalVersion.isSet() ) {
                // this connection is cleaning itself
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            if ( ! version.isSet() && globalVersion.isSet() ) {
                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    globalVersion.addToBSON( result, "globalVersion" );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                globalVersion.addToBSON( result, "beforeDrop" );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.resetVersion( ns );
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            // TODO: Refactor all of this
            if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) {
                errmsg = "this connection already had a newer version of collection '" + ns + "'";
                result.append( "ns" , ns );
                version.addToBSON( result, "newVersion" );
                globalVersion.addToBSON( result, "globalVersion" );
                return false;
            }

            // TODO: Refactor all of this
            if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) {
                while ( shardingState.inCriticalMigrateSection() ) {
                    log() << "waiting till out of critical section" << endl;
                    dbtemprelease r;
                    shardingState.waitTillNotInCriticalSection( 10 );
                }
                errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                globalVersion.addToBSON( result, "globalVersion" );
                result.appendBool( "reloadConfig" , true );
                return false;
            }

            if ( ! globalVersion.isSet() && ! authoritative ) {
                // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
                // should require a reload.
                while ( shardingState.inCriticalMigrateSection() ) {
                    log() << "waiting till out of critical section" << endl;
                    dbtemprelease r;
                    shardingState.waitTillNotInCriticalSection( 10 );
                }

                // need authoritative for first look
                result.append( "ns" , ns );
                result.appendBool( "need_authoritative" , true );
                errmsg = "first time for collection '" + ns + "'";
                return false;
            }

            Timer relockTime;
            {
                dbtemprelease unlock;

                ChunkVersion currVersion = version;
                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                    errmsg = str::stream() << "client version differs from config's for collection '" << ns << "'";
                    result.append( "ns" , ns );

                    // If this was a reset of a collection, inform mongos to do a full reload
                    if( ! currVersion.isSet()  ){
                        ConfigVersion( 0, OID() ).addToBSON( result, "version" );
                        result.appendBool( "reloadConfig", true );
                    }
                    else{
                        version.addToBSON( result, "version" );
                    }

                    globalVersion.addToBSON( result, "globalVersion" );
                    return false;
                }
            }
            if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
                log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
            }
            
            info->setVersion( ns , version );
            return true;
        }
Esempio n. 19
0
/**
 * Updates the remote cached version on the remote shard host (primary, in the case of replica
 * sets) if needed with a fully-qualified shard version for the given namespace:
 *   config server(s) + shard name + shard version
 *
 * If no remote cached version has ever been set, an initial shard version is sent.
 *
 * If the namespace is empty and no version has ever been sent, the config server + shard name
 * is sent to the remote shard host to initialize the connection as coming from mongos.
 * NOTE: This initialization is *best-effort only*.  Operations which wish to correctly version
 * must send the namespace.
 *
 * Config servers are special and are not (unless otherwise a shard) kept up to date with this
 * protocol.  This is safe so long as config servers only contain unversioned collections.
 *
 * It is an error to call checkShardVersion with an unversionable connection (isVersionableCB).
 *
 * @return true if we contacted the remote host
 */
bool checkShardVersion(DBClientBase* conn_in,
                       const string& ns,
                       ChunkManagerPtr refManager,
                       bool authoritative,
                       int tryNumber) {
    // TODO: cache, optimize, etc...

    // Empty namespaces are special - we require initialization but not versioning
    if (ns.size() == 0) {
        return initShardVersionEmptyNS(conn_in);
    }

    auto status = grid.catalogCache()->getDatabase(nsToDatabase(ns));
    if (!status.isOK()) {
        return false;
    }

    shared_ptr<DBConfig> conf = status.getValue();

    DBClientBase* conn = getVersionable(conn_in);
    verify(conn);  // errors thrown above

    unsigned long long officialSequenceNumber = 0;

    ShardPtr primary;
    ChunkManagerPtr manager;
    if (authoritative)
        conf->getChunkManagerIfExists(ns, true);

    conf->getChunkManagerOrPrimary(ns, manager, primary);

    if (manager) {
        officialSequenceNumber = manager->getSequenceNumber();
    }

    const auto shard = grid.shardRegistry()->getShard(conn->getServerAddress());
    uassert(ErrorCodes::ShardNotFound,
            str::stream() << conn->getServerAddress() << " is not recognized as a shard",
            shard);

    // Check this manager against the reference manager
    if (manager) {
        if (refManager && !refManager->compatibleWith(*manager, shard->getId())) {
            const ChunkVersion refVersion(refManager->getVersion(shard->getId()));
            const ChunkVersion currentVersion(manager->getVersion(shard->getId()));

            string msg(str::stream()
                       << "manager (" << currentVersion.toString() << " : "
                       << manager->getSequenceNumber() << ") "
                       << "not compatible with reference manager (" << refVersion.toString()
                       << " : " << refManager->getSequenceNumber() << ") "
                       << "on shard " << shard->getId() << " (" << shard->getConnString().toString()
                       << ")");

            throw SendStaleConfigException(ns, msg, refVersion, currentVersion);
        }
    } else if (refManager) {
        string msg(str::stream() << "not sharded ("
                                 << ((manager.get() == 0) ? string("<none>") : str::stream()
                                             << manager->getSequenceNumber())
                                 << ") but has reference manager ("
                                 << refManager->getSequenceNumber() << ") "
                                 << "on conn " << conn->getServerAddress() << " ("
                                 << conn_in->getServerAddress() << ")");

        throw SendStaleConfigException(
            ns, msg, refManager->getVersion(shard->getId()), ChunkVersion::UNSHARDED());
    }

    // Do not send setShardVersion to collections on the config servers - this causes problems
    // when config servers are also shards and get SSV with conflicting names.
    // TODO: Make config servers regular shards
    if (primary && primary->getId() == "config") {
        return false;
    }

    // Has the ChunkManager been reloaded since the last time we updated the shard version over
    // this connection?  If we've never updated the shard version, do so now.
    unsigned long long sequenceNumber = 0;
    if (connectionShardStatus.getSequence(conn, ns, &sequenceNumber)) {
        if (sequenceNumber == officialSequenceNumber) {
            return false;
        }
    }

    ChunkVersion version = ChunkVersion(0, 0, OID());
    if (manager) {
        version = manager->getVersion(shard->getId());
    }

    LOG(1) << "setting shard version of " << version << " for " << ns << " on shard "
           << shard->toString();

    LOG(3) << "last version sent with chunk manager iteration " << sequenceNumber
           << ", current chunk manager iteration is " << officialSequenceNumber;

    BSONObj result;
    if (setShardVersion(*conn,
                        ns,
                        grid.catalogManager()->connectionString().toString(),
                        version,
                        manager.get(),
                        authoritative,
                        result)) {
        LOG(1) << "      setShardVersion success: " << result;
        connectionShardStatus.setSequence(conn, ns, officialSequenceNumber);
        return true;
    }

    LOG(1) << "       setShardVersion failed!\n" << result << endl;

    if (result["need_authoritative"].trueValue())
        massert(10428, "need_authoritative set but in authoritative mode already", !authoritative);

    if (!authoritative) {
        // use the original connection and get a fresh versionable connection
        // since conn can be invalidated (or worse, freed) after the failure
        checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1);
        return true;
    }

    if (result["reloadConfig"].trueValue()) {
        if (result["version"].timestampTime() == Date_t()) {
            warning() << "reloading full configuration for " << conf->name()
                      << ", connection state indicates significant version changes";

            // reload db
            conf->reload();
        } else {
            // reload config
            conf->getChunkManager(ns, true);
        }
    }

    const int maxNumTries = 7;
    if (tryNumber < maxNumTries) {
        LOG(tryNumber < (maxNumTries / 2) ? 1 : 0)
            << "going to retry checkShardVersion shard: " << shard->toString() << " " << result;
        sleepmillis(10 * tryNumber);
        // use the original connection and get a fresh versionable connection
        // since conn can be invalidated (or worse, freed) after the failure
        checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1);
        return true;
    }

    string errmsg = str::stream() << "setShardVersion failed shard: " << shard->toString() << " "
                                  << result;
    log() << "     " << errmsg << endl;
    massert(10429, errmsg, 0);
    return true;
}
Query ConfigDiffTracker<ValType,ShardType>::
configDiffQuery( const set<ChunkVersion>& extraMinorVersions ) const
{
    verifyAttached();

    //
    // Basic idea behind the query is to find all the chunks $gt the current max version, and
    // then also update chunks that we need minor versions - splits and (2.0) max chunks on
    // shards
    //

    static const int maxMinorVersionClauses = 50;
    BSONObjBuilder queryB;

    int numStaleMinorClauses = extraMinorVersions.size() + _maxShardVersions->size();

#ifdef _DEBUG
    // In debug builds, randomly trigger full reloads to exercise both codepaths
    if( rand() % 2 ) numStaleMinorClauses = maxMinorVersionClauses;
#endif

    queryB.append(ChunkType::ns(), _ns);

    //
    // If we have only a few minor versions to refresh, we can be more selective in our query
    //
    if( numStaleMinorClauses < maxMinorVersionClauses ) {

        //
        // Get any version changes higher than we know currently
        //
        BSONArrayBuilder queryOrB( queryB.subarrayStart( "$or" ) );
        {
            BSONObjBuilder queryNewB( queryOrB.subobjStart() );
            {
                BSONObjBuilder ts(queryNewB.subobjStart(ChunkType::DEPRECATED_lastmod()));
                // We should *always* pull at least a single chunk back, this lets us quickly
                // detect if our collection was unsharded (and most of the time if it was
                // resharded) in the meantime
                ts.appendTimestamp( "$gte", _maxVersion->toLong() );
                ts.done();
            }

            queryNewB.done();
        }

        // Get any shard version changes higher than we know currently
        // Needed since there could have been a split of the max version chunk of any shard
        // TODO: Ideally, we shouldn't care about these
        for( typename map<ShardType, ChunkVersion>::const_iterator it = _maxShardVersions->begin(); it != _maxShardVersions->end(); it++ ) {

            BSONObjBuilder queryShardB( queryOrB.subobjStart() );
            queryShardB.append(ChunkType::shard(), nameFrom( it->first ) );
            {
                BSONObjBuilder ts(queryShardB.subobjStart(ChunkType::DEPRECATED_lastmod()));
                ts.appendTimestamp( "$gt", it->second.toLong() );
                ts.done();
            }
            queryShardB.done();
        }

        // Get any minor version changes we've marked as interesting
        // TODO: Ideally we shouldn't care about these
        for( set<ChunkVersion>::const_iterator it = extraMinorVersions.begin(); it != extraMinorVersions.end(); it++ ) {

            BSONObjBuilder queryShardB( queryOrB.subobjStart() );
            {
                BSONObjBuilder ts(queryShardB.subobjStart(ChunkType::DEPRECATED_lastmod()));
                ts.appendTimestamp( "$gt", it->toLong() );
                ts.appendTimestamp( "$lt",
                                    ChunkVersion( it->majorVersion() + 1, 0, OID() ).toLong() );
                ts.done();
            }
            queryShardB.done();
        }

        queryOrB.done();
    }

    BSONObj query = queryB.obj();

    LOG(2) << "major version query from " << *_maxVersion << " and over "
           << _maxShardVersions->size() << " shards is " << query << endl;

    //
    // NOTE: IT IS IMPORTANT FOR CONSISTENCY THAT WE SORT BY ASC VERSION, TO HANDLE
    // CURSOR YIELDING BETWEEN CHUNKS BEING MIGRATED.
    //
    // This ensures that changes to chunk version (which will always be higher) will always
    // come *after* our current position in the chunk cursor.
    //

    Query queryObj(query);
    queryObj.sort(BSON( "lastmod" << 1 ));

    return Query( query );
}
    void ShardChunkManager::_init( const string& configServer , const string& ns , const string& shardName, ShardChunkManagerPtr oldManager ) {

        // have to get a connection to the config db
        // special case if I'm the configdb since I'm locked and if I connect to myself
        // its a deadlock
        scoped_ptr<ScopedDbConnection> scoped;
        scoped_ptr<DBDirectClient> direct;
        DBClientBase * conn;
        if ( configServer.empty() ) {
            direct.reset( new DBDirectClient() );
            conn = direct.get();
        }
        else {
            scoped.reset( ScopedDbConnection::getInternalScopedDbConnection( configServer, 30.0 ) );
            conn = scoped->get();
        }

        // get this collection's sharding key
        BSONObj collectionDoc = conn->findOne(CollectionType::ConfigNS, BSON(CollectionType::ns(ns)));

        if( collectionDoc.isEmpty() ){
            warning() << ns << " does not exist as a sharded collection" << endl;
            return;
        }

        if( collectionDoc[CollectionType::dropped()].Bool() ){
            warning() << ns << " was dropped.  Re-shard collection first." << endl;
            return;
        }

        _fillCollectionKey( collectionDoc );

        map<string,ChunkVersion> versionMap;
        versionMap[ shardName ] = _version;
        _collVersion = ChunkVersion( 0, OID() );

        // Check to see if we have an old ShardChunkManager to use
        if( oldManager && oldManager->_collVersion.isSet() ){

            versionMap[ shardName ] = oldManager->_version;
            _collVersion = oldManager->_collVersion;
            // TODO: This could be made more efficient if copying not required, but not as
            // frequently reloaded as in mongos.
            _chunksMap = oldManager->_chunksMap;

            LOG(2) << "loading new chunks for collection " << ns << " using old chunk manager w/ version " << _collVersion
                   << " and " << _chunksMap.size() << " chunks" << endl;
        }

        // Attach our config diff tracker to our range map and versions
        SCMConfigDiffTracker differ( shardName );
        differ.attach( ns, _chunksMap, _collVersion, versionMap );

        // Need to do the query ourselves, since we may use direct conns to the db
        Query query = differ.configDiffQuery();
        auto_ptr<DBClientCursor> cursor = conn->query(ChunkType::ConfigNS, query);

        uassert( 16181, str::stream() << "could not initialize cursor to config server chunks collection for ns " << ns, cursor.get() );

        // Diff tracker should *always* find at least one chunk if collection exists
        int diffsApplied = differ.calculateConfigDiff( *cursor );
        if( diffsApplied > 0 ){

            LOG(2) << "loaded " << diffsApplied << " chunks into new chunk manager for " << ns
                   << " with version " << _collVersion << endl;

            // Save the new version of this shard
            _version = versionMap[ shardName ];
            _fillRanges();

        }
        else if( diffsApplied == 0 ){

            // No chunks were found for the ns
            warning() << "no chunks found when reloading " << ns << ", previous version was " << _collVersion << endl;

            _version = ChunkVersion( 0, OID() );
            _collVersion = ChunkVersion( 0, OID() );
            _chunksMap.clear();
        }
        else{

            // TODO: make this impossible by making sure we don't migrate / split on this shard during the
            // reload
            // No chunks were found for the ns
            warning() << "invalid chunks found when reloading " << ns << ", previous version was " << _collVersion
                      << ", this should be rare" << endl;

            // Handle the same way as a connectivity error, for now
            // TODO: handle inline
            uassert( 16229,
                     str::stream() << "could not initialize cursor to config server chunks collection for ns "
                                   << ns, cursor.get() );
        }

        if ( scoped.get() )
            scoped->done();

        if ( _chunksMap.empty() )
            log() << "no chunk for collection " << ns << " on shard " << shardName << endl;
    }
    /**
     * @return true if had to do something
     */
    bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) {
        // TODO: cache, optimize, etc...

        WriteBackListener::init( *conn_in );

        DBConfigPtr conf = grid.getDBConfig( ns );
        if ( ! conf )
            return false;

        DBClientBase* conn = getVersionable( conn_in );
        verify(conn); // errors thrown above

        unsigned long long officialSequenceNumber = 0;

        ChunkManagerPtr manager;
        const bool isSharded = conf->isSharded( ns );
        if ( isSharded ) {
            manager = conf->getChunkManagerIfExists( ns , authoritative );
            // It's possible the chunk manager was reset since we checked whether sharded was true,
            // so must check this here.
            if( manager ) officialSequenceNumber = manager->getSequenceNumber();
        }

        // Check this manager against the reference manager
        if( isSharded && manager ){

            Shard shard = Shard::make( conn->getServerAddress() );
            if( refManager && ! refManager->compatibleWith( manager, shard ) ){
                throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString()  << " : " << manager->getSequenceNumber() << ") "
                                                                      << "not compatible with reference manager (" << refManager->getVersion( shard ).toString()  << " : " << refManager->getSequenceNumber() << ") "
                                                                      << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")",
                                                refManager->getVersion( shard ), manager->getVersion( shard ) );
            }
        }
        else if( refManager ){
            Shard shard = Shard::make( conn->getServerAddress() );
            string msg( str::stream() << "not sharded ("
                        << ( (manager.get() == 0) ? string( "<none>" ) :
                                str::stream() << manager->getSequenceNumber() )
                        << ") but has reference manager ("
                        << refManager->getSequenceNumber() << ") "
                        << "on conn " << conn->getServerAddress() << " ("
                        << conn_in->getServerAddress() << ")" );

            throw SendStaleConfigException( ns, msg,
                    refManager->getVersion( shard ), ChunkVersion( 0, OID() ));
        }

        // has the ChunkManager been reloaded since the last time we updated the connection-level version?
        // (ie., last time we issued the setShardVersions below)
        unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns);
        if ( sequenceNumber == officialSequenceNumber ) {
            return false;
        }


        ChunkVersion version = ChunkVersion( 0, OID() );
        if ( isSharded && manager ) {
            version = manager->getVersion( Shard::make( conn->getServerAddress() ) );
        }

        if( ! version.isSet() ){
            LOG(0) << "resetting shard version of " << ns << " on " << conn->getServerAddress() << ", " <<
                      ( ! isSharded ? "no longer sharded" :
                      ( ! manager ? "no chunk manager found" :
                                    "version is zero" ) ) << endl;
        }

        LOG(2) << " have to set shard version for conn: " << conn->getServerAddress() << " ns:" << ns
               << " my last seq: " << sequenceNumber << "  current: " << officialSequenceNumber
               << " version: " << version << " manager: " << manager.get()
               << endl;

        const string versionableServerAddress(conn->getServerAddress());

        BSONObj result;
        if ( setShardVersion( *conn , ns , version , manager , authoritative , result ) ) {
            // success!
            LOG(1) << "      setShardVersion success: " << result << endl;
            connectionShardStatus.setSequence( conn , ns , officialSequenceNumber );
            return true;
        }

        LOG(1) << "       setShardVersion failed!\n" << result << endl;

        if ( result["need_authoritative"].trueValue() )
            massert( 10428 ,  "need_authoritative set but in authoritative mode already" , ! authoritative );

        if ( ! authoritative ) {
            // use the original connection and get a fresh versionable connection
            // since conn can be invalidated (or worse, freed) after the failure
            checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1);
            return true;
        }
        
        if ( result["reloadConfig"].trueValue() ) {
            if( result["version"].timestampTime() == 0 ){

                warning() << "reloading full configuration for " << conf->getName()
                          << ", connection state indicates significant version changes" << endl;

                // reload db
                conf->reload();
            }
            else {
                // reload config
                conf->getChunkManager( ns , true );
            }
        }

        const int maxNumTries = 7;
        if ( tryNumber < maxNumTries ) {
            LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) 
                << "going to retry checkShardVersion host: " << versionableServerAddress << " " << result << endl;
            sleepmillis( 10 * tryNumber );
            // use the original connection and get a fresh versionable connection
            // since conn can be invalidated (or worse, freed) after the failure
            checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1);
            return true;
        }
        
        string errmsg = str::stream() << "setShardVersion failed host: " << versionableServerAddress << " " << result;
        log() << "     " << errmsg << endl;
        massert( 10429 , errmsg , 0 );
        return true;
    }
Esempio n. 23
0
/**
 * Special internal logic to run reduced version handshake for empty namespace operations to
 * shards.
 *
 * Eventually this should go completely away, but for now many commands rely on unversioned but
 * mongos-specific behavior on mongod (auditing and replication information in commands)
 */
static bool initShardVersionEmptyNS(DBClientBase* conn_in) {
    bool ok;
    BSONObj result;
    DBClientBase* conn = NULL;
    try {
        // May throw if replica set primary is down
        conn = getVersionable(conn_in);
        dassert(conn);  // errors thrown above

        // Check to see if we've already initialized this connection
        if (connectionShardStatus.hasAnySequenceSet(conn))
            return false;

        // Check to see if this is actually a shard and not a single config server
        // NOTE: Config servers are registered only by the name "config" in the shard cache, not
        // by host, so lookup by host will fail unless the host is also a shard.
        const auto shard = grid.shardRegistry()->getShard(conn->getServerAddress());
        if (!shard) {
            return false;
        }

        LOG(1) << "initializing shard connection to " << shard->toString() << endl;

        ok = setShardVersion(*conn,
                             "",
                             grid.catalogManager()->connectionString().toString(),
                             ChunkVersion(),
                             NULL,
                             true,
                             result);
    } catch (const DBException&) {
        // NOTE: Replica sets may fail to initShardVersion because future calls relying on
        // correct versioning must later call checkShardVersion on the primary.
        // Secondary queries and commands may not call checkShardVersion, but secondary ops
        // aren't versioned at all.
        if (conn_in->type() != ConnectionString::SET) {
            throw;
        }

        // NOTE: Only old-style cluster operations will talk via DBClientReplicaSets - using
        // checkShardVersion is required (which includes initShardVersion information) if these
        // connections are used.

        OCCASIONALLY {
            warning() << "failed to initialize new replica set connection version, "
                      << "will initialize on first use" << endl;
        }

        return false;
    }

    // Record the connection wire version if sent in the response, initShardVersion is a
    // handshake for mongos->mongod connections.
    if (!result["minWireVersion"].eoo()) {
        int minWireVersion = result["minWireVersion"].numberInt();
        int maxWireVersion = result["maxWireVersion"].numberInt();
        conn->setWireVersions(minWireVersion, maxWireVersion);
    }

    LOG(3) << "initial sharding result : " << result << endl;

    connectionShardStatus.setSequence(conn, "", 0);
    return ok;
}
Esempio n. 24
0
    Status MetadataLoader::initChunks( const string& ns,
                                       const string& shard,
                                       const CollectionMetadata* oldMetadata,
                                       CollectionMetadata* metadata ) const
    {
        map<string, ChunkVersion> versionMap;

        // Preserve the epoch
        versionMap[shard] = metadata->_shardVersion;
        OID epoch = metadata->getCollVersion().epoch();
        bool fullReload = true;

        // Check to see if we should use the old version or not.
        if ( oldMetadata ) {

            // If our epochs are compatible, it's useful to use the old metadata for diffs
            if ( oldMetadata->getCollVersion().hasCompatibleEpoch( epoch ) ) {

                fullReload = false;
                dassert( oldMetadata->isValid() );

                versionMap[shard] = oldMetadata->_shardVersion;
                metadata->_collVersion = oldMetadata->_collVersion;

                // TODO: This could be made more efficient if copying not required, but
                // not as frequently reloaded as in mongos.
                metadata->_chunksMap = oldMetadata->_chunksMap;

                LOG( 2 ) << "loading new chunks for collection " << ns
                         << " using old metadata w/ version " << oldMetadata->getShardVersion()
                         << " and " << metadata->_chunksMap.size() << " chunks" << endl;
            }
            else {
                warning() << "reloading collection metadata for " << ns << " with new epoch "
                          << epoch.toString() << ", the current epoch is "
                          << oldMetadata->getCollVersion().epoch().toString() << endl;
            }
        }


        // Exposes the new metadata's range map and version to the "differ," who
        // would ultimately be responsible of filling them up.
        SCMConfigDiffTracker differ( shard );
        differ.attach( ns, metadata->_chunksMap, metadata->_collVersion, versionMap );

        try {

            ScopedDbConnection conn( _configLoc.toString(), 30 );

            auto_ptr<DBClientCursor> cursor = conn->query( ChunkType::ConfigNS,
                                                           differ.configDiffQuery() );

            if ( !cursor.get() ) {

                // Make our metadata invalid
                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();

                return Status( ErrorCodes::HostUnreachable,
                               "problem opening chunk metadata cursor" );
            }

            //
            // The diff tracker should always find at least one chunk (the highest chunk we saw
            // last time).  If not, something has changed on the config server (potentially between
            // when we read the collection data and when we read the chunks data).
            //

            int diffsApplied = differ.calculateConfigDiff( *cursor );
            if ( diffsApplied > 0 ) {

                // Chunks found, return ok

                LOG(2) << "loaded " << diffsApplied << " chunks into new metadata for " << ns
                           << " with version " << metadata->_collVersion << endl;

                metadata->_shardVersion = versionMap[shard];
                metadata->fillRanges();
                conn.done();

                dassert( metadata->isValid() );
                return Status::OK();
            }
            else if ( diffsApplied == 0 ) {

                // No chunks found, the collection is dropping or we're confused
                // If this is a full reload, assume it is a drop for backwards compatibility
                // TODO: drop the config.collections entry *before* the chunks and eliminate this
                // ambiguity

                string errMsg =
                    str::stream() << "no chunks found when reloading " << ns
                                  << ", previous version was "
                                  << metadata->_collVersion.toString()
                                  << ( fullReload ? ", this is a drop" : "" );

                warning() << errMsg << endl;

                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();

                return fullReload ? Status( ErrorCodes::NamespaceNotFound, errMsg ) :
                                    Status( ErrorCodes::RemoteChangeDetected, errMsg );
            }
            else {

                // Invalid chunks found, our epoch may have changed because we dropped/recreated
                // the collection.

                string errMsg = // br
                        str::stream() << "invalid chunks found when reloading " << ns
                                      << ", previous version was "
                                      << metadata->_collVersion.toString()
                                      << ", this should be rare";

                warning() << errMsg << endl;

                metadata->_collVersion = ChunkVersion( 0, 0, OID() );
                metadata->_chunksMap.clear();
                conn.done();

                return Status( ErrorCodes::RemoteChangeDetected, errMsg );
            }
        }
        catch ( const DBException& e ) {
            string errMsg = str::stream() << "problem querying chunks metadata" << causedBy( e );

            // We deliberately do not return connPtr to the pool, since it was involved
            // with the error here.

            return Status( ErrorCodes::HostUnreachable, errMsg );
        }
    }
int ConfigDiffTracker<ValType,ShardType>::
calculateConfigDiff( DBClientCursorInterface& diffCursor )
{
    verifyAttached();

    // Apply the chunk changes to the ranges and versions

    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //    per-collection, and remove any conflicting chunks from the ranges
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the
    //    shard for mongod) add them to the ranges
    //

    vector<BSONObj> newTracked;
    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;
    while( diffCursor.more() ) {

        BSONObj diffChunkDoc = diffCursor.next();

        ChunkVersion chunkVersion = ChunkVersion::fromBSON(diffChunkDoc, ChunkType::DEPRECATED_lastmod());

        if( diffChunkDoc[ChunkType::min()].type() != Object ||
                diffChunkDoc[ChunkType::max()].type() != Object ||
                diffChunkDoc[ChunkType::shard()].type() != String )
        {
            warning() << "got invalid chunk document " << diffChunkDoc
                      << " when trying to load differing chunks" << endl;
            continue;
        }

        if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ) {

            warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc
                      << " when trying to load differing chunks at version "
                      << ChunkVersion( _maxVersion->toLong(), currEpoch ) << endl;

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion;

        // Chunk version changes
        ShardType shard = shardFor( diffChunkDoc[ChunkType::shard()].String() );
        typename map<ShardType, ChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard );
        if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ) {
            (*_maxShardVersions)[ shard ] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes
        removeOverlapping(diffChunkDoc[ChunkType::min()].Obj(),
                          diffChunkDoc[ChunkType::max()].Obj());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer
        if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() );
    }

    LOG(3) << "found " << _validDiffs
           << " new chunks for collection " << _ns
           << " (tracking " << newTracked.size()
           << "), new version is " << *_maxVersion
           << endl;

    for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ) {

        BSONObj chunkDoc = *it;

        // Important - we need to make sure we actually own the min and max here
        BSONObj min = chunkDoc[ChunkType::min()].Obj().getOwned();
        BSONObj max = chunkDoc[ChunkType::max()].Obj().getOwned();

        // Invariant enforced by sharding
        // It's possible to read inconsistent state b/c of getMore() and yielding, so we want
        // to detect as early as possible.
        // TODO: This checks for overlap, we also should check for holes here iff we're tracking
        // all chunks
        if( isOverlapping( min, max ) ) return -1;

        _currMap->insert( rangeFor( chunkDoc, min, max ) );
    }

    return _validDiffs;
}
Esempio n. 26
0
void OperationShardingState::_clear() {
    _hasVersion = false;
    _shardVersion = ChunkVersion();
    _ns = NamespaceString();
}
Esempio n. 27
0
bool ChunkManager::_load(ChunkMap& chunkMap,
                         set<ShardId>& shardIds,
                         ShardVersionMap* shardVersions,
                         const ChunkManager* oldManager) {
    // Reset the max version, but not the epoch, when we aren't loading from the oldManager
    _version = ChunkVersion(0, 0, _version.epoch());

    // If we have a previous version of the ChunkManager to work from, use that info to reduce
    // our config query
    if (oldManager && oldManager->getVersion().isSet()) {
        // Get the old max version
        _version = oldManager->getVersion();

        // Load a copy of the old versions
        *shardVersions = oldManager->_shardVersions;

        // Load a copy of the chunk map, replacing the chunk manager with our own
        const ChunkMap& oldChunkMap = oldManager->getChunkMap();

        // Could be v.expensive
        // TODO: If chunks were immutable and didn't reference the manager, we could do more
        // interesting things here
        for (const auto& oldChunkMapEntry : oldChunkMap) {
            shared_ptr<Chunk> oldC = oldChunkMapEntry.second;
            shared_ptr<Chunk> newC(new Chunk(
                this, oldC->getMin(), oldC->getMax(), oldC->getShardId(), oldC->getLastmod()));

            newC->setBytesWritten(oldC->getBytesWritten());

            chunkMap.insert(make_pair(oldC->getMax(), newC));
        }

        LOG(2) << "loading chunk manager for collection " << _ns
               << " using old chunk manager w/ version " << _version.toString() << " and "
               << oldChunkMap.size() << " chunks";
    }

    // Attach a diff tracker for the versioned chunk data
    CMConfigDiffTracker differ(this);
    differ.attach(_ns, chunkMap, _version, *shardVersions);

    // Diff tracker should *always* find at least one chunk if collection exists
    // Get the diff query required
    auto diffQuery = differ.configDiffQuery();

    std::vector<ChunkType> chunks;
    uassertStatusOK(
        grid.catalogManager()->getChunks(diffQuery.query, diffQuery.sort, boost::none, &chunks));

    int diffsApplied = differ.calculateConfigDiff(chunks);
    if (diffsApplied > 0) {
        LOG(2) << "loaded " << diffsApplied << " chunks into new chunk manager for " << _ns
               << " with version " << _version;

        // Add all existing shards we find to the shards set
        for (ShardVersionMap::iterator it = shardVersions->begin(); it != shardVersions->end();) {
            shared_ptr<Shard> shard = grid.shardRegistry()->getShard(it->first);
            if (shard) {
                shardIds.insert(it->first);
                ++it;
            } else {
                shardVersions->erase(it++);
            }
        }

        return true;
    } else if (diffsApplied == 0) {
        // No chunks were found for the ns
        warning() << "no chunks found when reloading " << _ns << ", previous version was "
                  << _version;

        // Set all our data to empty
        chunkMap.clear();
        shardVersions->clear();

        _version = ChunkVersion(0, 0, OID());

        return true;
    } else {  // diffsApplied < 0

        bool allInconsistent = (differ.numValidDiffs() == 0);
        if (allInconsistent) {
            // All versions are different, this can be normal
            warning() << "major change in chunk information found when reloading " << _ns
                      << ", previous version was " << _version;
        } else {
            // Inconsistent load halfway through (due to yielding cursor during load)
            // should be rare
            warning() << "inconsistent chunks found when reloading " << _ns
                      << ", previous version was " << _version << ", this should be rare";
        }

        // Set all our data to empty to be extra safe
        chunkMap.clear();
        shardVersions->clear();

        _version = ChunkVersion(0, 0, OID());

        return allInconsistent;
    }
}
Esempio n. 28
0
 Status ShardingState::refreshMetadataNow( const string& ns, ChunkVersion* latestShardVersion )
 {
     return doRefreshMetadata( ns, ChunkVersion( 0, 0, OID() ), false, latestShardVersion );
 }
Esempio n. 29
0
int ConfigDiffTracker<ValType, ShardType>::calculateConfigDiff(
    const std::vector<ChunkType>& chunks) {
    _assertAttached();

    // Apply the chunk changes to the ranges and versions
    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //      per-collection, and remove any conflicting chunks from the ranges.
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on
    //      the shard for mongod) add them to the ranges.

    std::vector<ChunkType> newTracked;

    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;

    for (const ChunkType& chunk : chunks) {
        ChunkVersion chunkVersion =
            ChunkVersion::fromBSON(chunk.toBSON(), ChunkType::DEPRECATED_lastmod());

        if (!chunkVersion.isSet() || !chunkVersion.hasEqualEpoch(currEpoch)) {
            warning() << "got invalid chunk version " << chunkVersion << " in document "
                      << chunk.toString() << " when trying to load differing chunks at version "
                      << ChunkVersion(
                          _maxVersion->majorVersion(), _maxVersion->minorVersion(), currEpoch);

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if (chunkVersion > *_maxVersion) {
            *_maxVersion = chunkVersion;
        }

        // Chunk version changes
        ShardType shard = shardFor(chunk.getShard());

        typename MaxChunkVersionMap::const_iterator shardVersionIt = _maxShardVersions->find(shard);
        if (shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion) {
            (*_maxShardVersions)[shard] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking because of this
        // chunk's changes
        removeOverlapping(chunk.getMin(), chunk.getMax());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore
        // or unbuffer.
        if (isTracked(chunk)) {
            newTracked.push_back(chunk);
        }
    }

    LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking "
           << newTracked.size() << "), new version is " << *_maxVersion;

    for (const ChunkType& chunk : newTracked) {
        // Invariant enforced by sharding - it's possible to read inconsistent state due to
        // getMore and yielding, so we want to detect it as early as possible.
        //
        // TODO: This checks for overlap, we also should check for holes here iff we're
        // tracking all chunks.
        if (isOverlapping(chunk.getMin(), chunk.getMax())) {
            return -1;
        }

        _currMap->insert(rangeFor(chunk));
    }

    return _validDiffs;
}
Esempio n. 30
0
        bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) 
                return false;

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
            }
            
            // Handle initial shard connection
            if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){

                result.append( "initialized", true );

                // Send back wire version to let mongos know what protocol we can speak
                result.append( "minWireVersion", minWireVersion );
                result.append( "maxWireVersion", maxWireVersion );

                return true;
            }

            // we can run on a slave up to here
            if ( ! isMaster( "admin" ) ) {
                result.append( "errmsg" , "not master" );
                result.append( "note" , "from post init in setShardVersion" );
                return false;
            }

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to specify namespace";
                return false;
            }

            if( ! ChunkVersion::canParseBSON( cmdObj, "version" ) ){
                errmsg = "need to specify version";
                return false;
            }

            const ChunkVersion version = ChunkVersion::fromBSON( cmdObj, "version" );
            
            // step 3

            const ChunkVersion oldVersion = info->getVersion(ns);
            const ChunkVersion globalVersion = shardingState.getVersion(ns);

            oldVersion.addToBSON( result, "oldVersion" );
            
            if ( globalVersion.isSet() && version.isSet() ) {
                // this means there is no reset going on an either side
                // so its safe to make some assumptions

                if ( version.isWriteCompatibleWith( globalVersion ) ) {
                    // mongos and mongod agree!
                    if ( ! oldVersion.isWriteCompatibleWith( version ) ) {
                        if ( oldVersion < globalVersion &&
                             oldVersion.hasCompatibleEpoch(globalVersion) )
                        {
                            info->setVersion( ns , version );
                        }
                        else if ( authoritative ) {
                            // this means there was a drop and our version is reset
                            info->setVersion( ns , version );
                        }
                        else {
                            result.append( "ns" , ns );
                            result.appendBool( "need_authoritative" , true );
                            errmsg = "verifying drop on '" + ns + "'";
                            return false;
                        }
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            if ( oldVersion.isSet() && ! globalVersion.isSet() ) {
                // this had been reset
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
            }

            if ( ! version.isSet() && ! globalVersion.isSet() ) {
                // this connection is cleaning itself
                info->setVersion( ns , ChunkVersion( 0, OID() ) );
                return true;
            }

            // Cases below all either return OR fall-through to remote metadata reload.
            if ( version.isSet() || !globalVersion.isSet() ) {

                // Not Dropping

                // TODO: Refactor all of this
                if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) {
                    errmsg = "this connection already had a newer version of collection '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "newVersion" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    return false;
                }

                // TODO: Refactor all of this
                if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) {
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }
                    errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                    result.append( "ns" , ns );
                    version.addToBSON( result, "version" );
                    globalVersion.addToBSON( result, "globalVersion" );
                    result.appendBool( "reloadConfig" , true );
                    return false;
                }

                if ( ! globalVersion.isSet() && ! authoritative ) {
                    // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which
                    // should require a reload.
                    while ( shardingState.inCriticalMigrateSection() ) {
                        log() << "waiting till out of critical section" << endl;
                        shardingState.waitTillNotInCriticalSection( 10 );
                    }

                    // need authoritative for first look
                    result.append( "ns" , ns );
                    result.appendBool( "need_authoritative" , true );
                    errmsg = "first time for collection '" + ns + "'";
                    return false;
                }

                // Fall through to metadata reload below
            }
            else {

                // Dropping

                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    globalVersion.addToBSON( result, "globalVersion" );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }

                // Fall through to metadata reload below
            }

            ChunkVersion currVersion;
            Status status = shardingState.refreshMetadataIfNeeded( ns, version, &currVersion );

            if (!status.isOK()) {

                // The reload itself was interrupted or confused here

                errmsg = str::stream() << "could not refresh metadata for " << ns
                                       << " with requested shard version " << version.toString()
                                       << ", stored shard version is " << currVersion.toString()
                                       << causedBy( status.reason() );

                warning() << errmsg << endl;

                result.append( "ns" , ns );
                version.addToBSON( result, "version" );
                currVersion.addToBSON( result, "globalVersion" );
                result.appendBool( "reloadConfig", true );

                return false;
            }
            else if ( !version.isWriteCompatibleWith( currVersion ) ) {

                // We reloaded a version that doesn't match the version mongos was trying to
                // set.

                errmsg = str::stream() << "requested shard version differs from"
                                       << " config shard version for " << ns
                                       << ", requested version is " << version.toString()
                                       << " but found version " << currVersion.toString();

                OCCASIONALLY warning() << errmsg << endl;

                // WARNING: the exact fields below are important for compatibility with mongos
                // version reload.

                result.append( "ns" , ns );
                currVersion.addToBSON( result, "globalVersion" );

                // If this was a reset of a collection or the last chunk moved out, inform mongos to
                // do a full reload.
                if (currVersion.epoch() != version.epoch() || !currVersion.isSet() ) {
                    result.appendBool( "reloadConfig", true );
                    // Zero-version also needed to trigger full mongos reload, sadly
                    // TODO: Make this saner, and less impactful (full reload on last chunk is bad)
                    ChunkVersion( 0, 0, OID() ).addToBSON( result, "version" );
                    // For debugging
                    version.addToBSON( result, "origVersion" );
                }
                else {
                    version.addToBSON( result, "version" );
                }

                return false;
            }

            info->setVersion( ns , version );
            return true;
        }