typename ConfigDiffTracker<ValType,ShardType>::RangeOverlap ConfigDiffTracker<ValType,ShardType>::
overlappingRange( const BSONObj& min, const BSONObj& max )
{
    verifyAttached();

    typename RangeMap::iterator low;
    typename RangeMap::iterator high;

    if( isMinKeyIndexed() ) {
        // Returns the first chunk with a min key that is >= min - implies the
        // previous chunk cannot overlap min
        low = _currMap->lower_bound( min );
        // Returns the first chunk with a min key that is >= max - implies the
        // chunk does not overlap max
        high = _currMap->lower_bound( max );
    }
    else {
        // Returns the first chunk with a max key that is > min - implies that
        // the chunk overlaps min
        low = _currMap->upper_bound( min );
        // Returns the first chunk with a max key that is > max - implies that
        // the next chunk cannot not overlap max
        high = _currMap->upper_bound( max );
    }

    return RangeOverlap( low, high );
}
int ConfigDiffTracker<ValType,ShardType>::
calculateConfigDiff( string config,
                     const set<ChunkVersion>& extraMinorVersions )
{
    verifyAttached();

    // Get the diff query required
    Query diffQuery = configDiffQuery( extraMinorVersions );

    ScopedDbConnection conn(config);

    try {

        // Open a cursor for the diff chunks
        auto_ptr<DBClientCursor> cursor = conn->query(
                                              ChunkType::ConfigNS, diffQuery, 0, 0, 0, 0, ( DEBUG_BUILD ? 2 : 1000000 ) );
        verify( cursor.get() );

        int diff = calculateConfigDiff( *cursor.get() );

        conn.done();

        return diff;
    }
    catch( DBException& e ) {
        // Should only happen on connection errors
        e.addContext( str::stream() << "could not calculate config difference for ns " << _ns << " on " << config );
        throw;
    }
}
Exemple #3
0
    Query ConfigDiffTracker<ValType,ShardType>::
        configDiffQuery( const set<ChunkVersion>& extraMinorVersions ) const
    {
        verifyAttached();

        //
        // Basic idea behind the query is to find all the chunks $gte the current max version.
        // Currently, any splits and merges will increment the current max version.
        //

        BSONObjBuilder queryB;
        queryB.append(ChunkType::ns(), _ns);
        BSONObjBuilder tsBuilder(queryB.subobjStart(ChunkType::DEPRECATED_lastmod()));
        tsBuilder.appendTimestamp( "$gte", _maxVersion->toLong() );
        tsBuilder.done();
        BSONObj query = queryB.obj();

        //
        // NOTE: IT IS IMPORTANT FOR CONSISTENCY THAT WE SORT BY ASC VERSION, TO HANDLE
        // CURSOR YIELDING BETWEEN CHUNKS BEING MIGRATED.
        //
        // This ensures that changes to chunk version (which will always be higher) will always
        // come *after* our current position in the chunk cursor.
        //

        Query queryObj(query);
        queryObj.sort(BSON( "lastmod" << 1 ));

        LOG(2) << "major version query from " << *_maxVersion << " and over "
               << _maxShardVersions->size() << " shards is " << queryObj << endl;

        return queryObj;
    }
void ConfigDiffTracker<ValType,ShardType>::
removeOverlapping( const BSONObj& min, const BSONObj& max )
{
    verifyAttached();

    RangeOverlap overlap = overlappingRange( min, max );

    _currMap->erase( overlap.first, overlap.second );
}
Query ConfigDiffTracker<ValType,ShardType>::
configDiffQuery( const set<ChunkVersion>& extraMinorVersions ) const
{
    verifyAttached();

    //
    // Basic idea behind the query is to find all the chunks $gt the current max version, and
    // then also update chunks that we need minor versions - splits and (2.0) max chunks on
    // shards
    //

    static const int maxMinorVersionClauses = 50;
    BSONObjBuilder queryB;

    int numStaleMinorClauses = extraMinorVersions.size() + _maxShardVersions->size();

#ifdef _DEBUG
    // In debug builds, randomly trigger full reloads to exercise both codepaths
    if( rand() % 2 ) numStaleMinorClauses = maxMinorVersionClauses;
#endif

    queryB.append(ChunkType::ns(), _ns);

    //
    // If we have only a few minor versions to refresh, we can be more selective in our query
    //
    if( numStaleMinorClauses < maxMinorVersionClauses ) {

        //
        // Get any version changes higher than we know currently
        //
        BSONArrayBuilder queryOrB( queryB.subarrayStart( "$or" ) );
        {
            BSONObjBuilder queryNewB( queryOrB.subobjStart() );
            {
                BSONObjBuilder ts(queryNewB.subobjStart(ChunkType::DEPRECATED_lastmod()));
                // We should *always* pull at least a single chunk back, this lets us quickly
                // detect if our collection was unsharded (and most of the time if it was
                // resharded) in the meantime
                ts.appendTimestamp( "$gte", _maxVersion->toLong() );
                ts.done();
            }

            queryNewB.done();
        }

        // Get any shard version changes higher than we know currently
        // Needed since there could have been a split of the max version chunk of any shard
        // TODO: Ideally, we shouldn't care about these
        for( typename map<ShardType, ChunkVersion>::const_iterator it = _maxShardVersions->begin(); it != _maxShardVersions->end(); it++ ) {

            BSONObjBuilder queryShardB( queryOrB.subobjStart() );
            queryShardB.append(ChunkType::shard(), nameFrom( it->first ) );
            {
                BSONObjBuilder ts(queryShardB.subobjStart(ChunkType::DEPRECATED_lastmod()));
                ts.appendTimestamp( "$gt", it->second.toLong() );
                ts.done();
            }
            queryShardB.done();
        }

        // Get any minor version changes we've marked as interesting
        // TODO: Ideally we shouldn't care about these
        for( set<ChunkVersion>::const_iterator it = extraMinorVersions.begin(); it != extraMinorVersions.end(); it++ ) {

            BSONObjBuilder queryShardB( queryOrB.subobjStart() );
            {
                BSONObjBuilder ts(queryShardB.subobjStart(ChunkType::DEPRECATED_lastmod()));
                ts.appendTimestamp( "$gt", it->toLong() );
                ts.appendTimestamp( "$lt",
                                    ChunkVersion( it->majorVersion() + 1, 0, OID() ).toLong() );
                ts.done();
            }
            queryShardB.done();
        }

        queryOrB.done();
    }

    BSONObj query = queryB.obj();

    LOG(2) << "major version query from " << *_maxVersion << " and over "
           << _maxShardVersions->size() << " shards is " << query << endl;

    //
    // NOTE: IT IS IMPORTANT FOR CONSISTENCY THAT WE SORT BY ASC VERSION, TO HANDLE
    // CURSOR YIELDING BETWEEN CHUNKS BEING MIGRATED.
    //
    // This ensures that changes to chunk version (which will always be higher) will always
    // come *after* our current position in the chunk cursor.
    //

    Query queryObj(query);
    queryObj.sort(BSON( "lastmod" << 1 ));

    return Query( query );
}
int ConfigDiffTracker<ValType,ShardType>::
calculateConfigDiff( DBClientCursorInterface& diffCursor )
{
    verifyAttached();

    // Apply the chunk changes to the ranges and versions

    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //    per-collection, and remove any conflicting chunks from the ranges
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the
    //    shard for mongod) add them to the ranges
    //

    vector<BSONObj> newTracked;
    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;
    while( diffCursor.more() ) {

        BSONObj diffChunkDoc = diffCursor.next();

        ChunkVersion chunkVersion = ChunkVersion::fromBSON(diffChunkDoc, ChunkType::DEPRECATED_lastmod());

        if( diffChunkDoc[ChunkType::min()].type() != Object ||
                diffChunkDoc[ChunkType::max()].type() != Object ||
                diffChunkDoc[ChunkType::shard()].type() != String )
        {
            warning() << "got invalid chunk document " << diffChunkDoc
                      << " when trying to load differing chunks" << endl;
            continue;
        }

        if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ) {

            warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc
                      << " when trying to load differing chunks at version "
                      << ChunkVersion( _maxVersion->toLong(), currEpoch ) << endl;

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion;

        // Chunk version changes
        ShardType shard = shardFor( diffChunkDoc[ChunkType::shard()].String() );
        typename map<ShardType, ChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard );
        if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ) {
            (*_maxShardVersions)[ shard ] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes
        removeOverlapping(diffChunkDoc[ChunkType::min()].Obj(),
                          diffChunkDoc[ChunkType::max()].Obj());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer
        if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() );
    }

    LOG(3) << "found " << _validDiffs
           << " new chunks for collection " << _ns
           << " (tracking " << newTracked.size()
           << "), new version is " << *_maxVersion
           << endl;

    for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ) {

        BSONObj chunkDoc = *it;

        // Important - we need to make sure we actually own the min and max here
        BSONObj min = chunkDoc[ChunkType::min()].Obj().getOwned();
        BSONObj max = chunkDoc[ChunkType::max()].Obj().getOwned();

        // Invariant enforced by sharding
        // It's possible to read inconsistent state b/c of getMore() and yielding, so we want
        // to detect as early as possible.
        // TODO: This checks for overlap, we also should check for holes here iff we're tracking
        // all chunks
        if( isOverlapping( min, max ) ) return -1;

        _currMap->insert( rangeFor( chunkDoc, min, max ) );
    }

    return _validDiffs;
}
Exemple #7
0
    Query ConfigDiffTracker<ValType,ShardType>::
        configDiffQuery( const set<ShardChunkVersion>& extraMinorVersions ) const
    {
        verifyAttached();

        //
        // Basic idea behind the query is to find all the chunks $gt the current max version, and
        // then also update chunks that we need minor versions - splits and (2.0) max chunks on
        // shards
        //

        static const int maxMinorVersionClauses = 50;
        BSONObjBuilder queryB;

        int numStaleMinorClauses = extraMinorVersions.size() + _maxShardVersions->size();

#ifdef _DEBUG
        // In debug builds, randomly trigger full reloads to exercise both codepaths
        if( rand() % 2 ) numStaleMinorClauses = maxMinorVersionClauses;
#endif

        if( numStaleMinorClauses < maxMinorVersionClauses ){

            BSONArrayBuilder queryOrB( queryB.subarrayStart( "$or" ) );

            //
            // Get any version changes higher than we know currently
            //

            {
                BSONObjBuilder queryNewB( queryOrB.subobjStart() );

                queryNewB.append( "ns", _ns );
                {
                    BSONObjBuilder ts( queryNewB.subobjStart( "lastmod" ) );
                    // We should *always* pull at least a single chunk back, this lets us quickly
                    // detect if our collection was unsharded (and most of the time if it was
                    // resharded) in the meantime
                    ts.appendTimestamp( "$gte", _maxVersion->toLong() );
                    ts.done();
                }

                queryNewB.done();
            }

            // Get any shard version changes higher than we know currently
            // Needed since there could have been a split of the max version chunk of any shard
            // TODO: Ideally, we shouldn't care about these
            for( typename map<ShardType, ShardChunkVersion>::const_iterator it = _maxShardVersions->begin(); it != _maxShardVersions->end(); it++ ){
                BSONObjBuilder queryShardB( queryOrB.subobjStart() );

                queryShardB.append( "ns", _ns );
                queryShardB.append( "shard", nameFrom( it->first ) );
                {
                    BSONObjBuilder ts( queryShardB.subobjStart( "lastmod" ) );
                    ts.appendTimestamp( "$gt", it->second.toLong() );
                    ts.done();
                }
                queryShardB.done();
            }

            // Get any minor version changes we've marked as interesting
            // TODO: Ideally we shouldn't care about these
            for( set<ShardChunkVersion>::const_iterator it = extraMinorVersions.begin(); it != extraMinorVersions.end(); it++ ){
                BSONObjBuilder queryShardB( queryOrB.subobjStart() );

                queryShardB.append( "ns", _ns );
                {
                    BSONObjBuilder ts( queryShardB.subobjStart( "lastmod" ) );
                    ts.appendTimestamp( "$gt", it->toLong() );
                    ts.appendTimestamp( "$lt",
                                        ShardChunkVersion( it->majorVersion() + 1, 0, OID() ).toLong() );
                    ts.done();
                }
                queryShardB.done();
            }

            queryOrB.done();
        }
        else{

            //
            // We don't want to send a giant $or query to the server, so just get all the chunks
            //

            queryB.append( "ns", _ns );
        }

        BSONObj query = queryB.obj();

        // log() << "major version query from " << *_maxVersion << " and over " << _maxShardVersions->size() << " shards is " << query << endl;

        return Query( query );
    }