Example #1
0
        void insertSharded( DBConfigPtr conf, const char* ns, BSONObj& o, int flags ) {
            ChunkManagerPtr manager = conf->getChunkManager(ns);
            if ( ! manager->hasShardKey( o ) ) {

                bool bad = true;

                if ( manager->getShardKey().partOfShardKey( "_id" ) ) {
                    BSONObjBuilder b;
                    b.appendOID( "_id" , 0 , true );
                    b.appendElements( o );
                    o = b.obj();
                    bad = ! manager->hasShardKey( o );
                }

                if ( bad ) {
                    log() << "tried to insert object without shard key: " << ns << "  " << o << endl;
                    uasserted( 14842 , "tried to insert object without shard key" );
                }

            }

            // Many operations benefit from having the shard key early in the object
            o = manager->getShardKey().moveToFront(o);

            const int maxTries = 30;

            for ( int i=0; i<maxTries; i++ ) {
                try {
                    ChunkPtr c = manager->findChunk( o );
                    log(4) << "  server:" << c->getShard().toString() << " " << o << endl;
                    insert( c->getShard() , ns , o , flags);

//                    r.gotInsert();
//                    if ( r.getClientInfo()->autoSplitOk() )
                        c->splitIfShould( o.objsize() );
                    break;
                }
                catch ( StaleConfigException& e ) {
                    int logLevel = i < ( maxTries / 2 );
                    LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl;
//                    r.reset();

                    unsigned long long old = manager->getSequenceNumber();
                    manager = conf->getChunkManager(ns);

                    LOG( logLevel ) << "  sequenece number - old: " << old << " new: " << manager->getSequenceNumber() << endl;

                    if (!manager) {
                        uasserted(14843, "collection no longer sharded");
                    }
                }
                sleepmillis( i * 20 );
            }
        }
Example #2
0
        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) {

            while ( d.moreJSObjs() ) {
                BSONObj o = d.nextJsObj();
                if ( ! manager->hasShardKey( o ) ) {

                    bool bad = true;

                    if ( manager->getShardKey().partOfShardKey( "_id" ) ) {
                        BSONObjBuilder b;
                        b.appendOID( "_id" , 0 , true );
                        b.appendElements( o );
                        o = b.obj();
                        bad = ! manager->hasShardKey( o );
                    }

                    if ( bad ) {
                        log() << "tried to insert object without shard key: " << r.getns() << "  " << o << endl;
                        throw UserException( 8011 , "tried to insert object without shard key" );
                    }

                }

                // Many operations benefit from having the shard key early in the object
                o = manager->getShardKey().moveToFront(o);

                const int maxTries = 10;

                bool gotThrough = false;
                for ( int i=0; i<maxTries; i++ ) {
                    try {
                        ChunkPtr c = manager->findChunk( o );
                        log(4) << "  server:" << c->getShard().toString() << " " << o << endl;
                        insert( c->getShard() , r.getns() , o );

                        r.gotInsert();
                        if ( r.getClientInfo()->autoSplitOk() )
                            c->splitIfShould( o.objsize() );
                        gotThrough = true;
                        break;
                    }
                    catch ( StaleConfigException& e ) {
                        log( i < ( maxTries / 2 ) ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl;
                        r.reset();
                        manager = r.getChunkManager();
                        uassert(14804, "collection no longer sharded", manager);
                    }
                    sleepmillis( i * 200 );
                }
                
                assert( inShutdown() || gotThrough );
            }
        }
Example #3
0
    void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
        int flags = d.pullInt();

        BSONObj query = d.nextJsObj();
        uassert( 13506 ,  "$atomic not supported sharded" , query["$atomic"].eoo() );
        uassert( 10201 ,  "invalid update" , d.moreJSObjs() );
        BSONObj toupdate = d.nextJsObj();

        BSONObj chunkFinder = query;

        bool upsert = flags & UpdateOption_Upsert;
        bool multi = flags & UpdateOption_Multi;

        uassert( 10202 ,  "can't mix multi and upsert and sharding" , ! ( upsert && multi ) );

        if (upsert) {
            uassert(8012, "can't upsert something without shard key",
                    (manager->hasShardKey(toupdate) ||
                     (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query))));

            BSONObj key = manager->getShardKey().extractKey(query);
            BSONForEach(e, key) {
                uassert(13465, "shard key in upsert query must be an exact match", getGtLtOp(e) == BSONObj::Equality);
            }
        }
Example #4
0
        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){
            
            while ( d.moreJSObjs() ){
                BSONObj o = d.nextJsObj();
                if ( ! manager->hasShardKey( o ) ){

                    bool bad = true;

                    if ( manager->getShardKey().partOfShardKey( "_id" ) ){
                        BSONObjBuilder b;
                        b.appendOID( "_id" , 0 , true );
                        b.appendElements( o );
                        o = b.obj();
                        bad = ! manager->hasShardKey( o );
                    }
                    
                    if ( bad ){
                        log() << "tried to insert object without shard key: " << r.getns() << "  " << o << endl;
                        throw UserException( 8011 , "tried to insert object without shard key" );
                    }
                    
                }
                
                ChunkPtr c = manager->findChunk( o );
                log(4) << "  server:" << c->getShard().toString() << " " << o << endl;
                insert( c->getShard() , r.getns() , o );

                r.gotInsert();
                
                c->splitIfShould( o.objsize() );
            }            
        }
Example #5
0
        void _groupInserts( ChunkManagerPtr manager, vector<BSONObj>& inserts, map<ChunkPtr,vector<BSONObj> >& insertsForChunks ){

            // Redo all inserts for chunks which have changed
            map<ChunkPtr,vector<BSONObj> >::iterator i = insertsForChunks.begin();
            while( ! insertsForChunks.empty() && i != insertsForChunks.end() ){
                if( ! manager->compatibleWith( i->first ) ){
                    inserts.insert( inserts.end(), i->second.begin(), i->second.end() );
                    insertsForChunks.erase( i++ );
                }
                else ++i;
            }

            // Figure out inserts we haven't chunked yet
            for( vector<BSONObj>::iterator i = inserts.begin(); i != inserts.end(); ++i ){

                BSONObj o = *i;

                if ( ! manager->hasShardKey( o ) ) {

                    bool bad = true;

                    // Add autogenerated _id to item and see if we now have a shard key
                    if ( manager->getShardKey().partOfShardKey( "_id" ) ) {
                        BSONObjBuilder b;
                        b.appendOID( "_id" , 0 , true );
                        b.appendElements( o );
                        o = b.obj();
                        bad = ! manager->hasShardKey( o );
                    }

                    if ( bad ) {
                        // TODO:
                        log() << "tried to insert object with no valid shard key for " << manager->getShardKey() << " : " << o << endl;
                        uassert( 8011, str::stream() << "tried to insert object with no valid shard key for " << manager->getShardKey().toString() << " : " << o.toString(), false );
                    }
                }

                // Many operations benefit from having the shard key early in the object
                o = manager->getShardKey().moveToFront(o);
                insertsForChunks[manager->findChunk(o)].push_back(o);
            }

            inserts.clear();
        }
Example #6
0
        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){
            
            while ( d.moreJSObjs() ){
                BSONObj o = d.nextJsObj();
                if ( ! manager->hasShardKey( o ) ){

                    bool bad = true;

                    if ( manager->getShardKey().partOfShardKey( "_id" ) ){
                        BSONObjBuilder b;
                        b.appendOID( "_id" , 0 , true );
                        b.appendElements( o );
                        o = b.obj();
                        bad = ! manager->hasShardKey( o );
                    }
                    
                    if ( bad ){
                        log() << "tried to insert object without shard key: " << r.getns() << "  " << o << endl;
                        throw UserException( 8011 , "tried to insert object without shard key" );
                    }
                    
                }
                
                bool gotThrough = false;
                for ( int i=0; i<10; i++ ){
                    try {
                        ChunkPtr c = manager->findChunk( o );
                        log(4) << "  server:" << c->getShard().toString() << " " << o << endl;
                        insert( c->getShard() , r.getns() , o );
                        
                        r.gotInsert();
                        c->splitIfShould( o.objsize() );
                        gotThrough = true;
                        break;
                    }
                    catch ( StaleConfigException& ){
                        log(1) << "retrying insert because of StaleConfigException: " << o << endl;
                        r.reset();
                        manager = r.getChunkManager();
                    }
                    sleepmillis( i * 200 );
                }

                assert( gotThrough );

            }            
        }
Example #7
0
            bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {

                if ( ! okForConfigChanges( errmsg ) )
                    return false;

                ShardConnection::sync();

                string ns = cmdObj.firstElement().valuestrsafe();
                if ( ns.size() == 0 ) {
                    errmsg = "no ns";
                    return false;
                }

                DBConfigPtr config = grid.getDBConfig( ns );
                if ( ! config->isSharded( ns ) ) {
                    config->reload();
                    if ( ! config->isSharded( ns ) ) {
                        errmsg = "ns not sharded.  have to shard before can split";
                        return false;
                    }
                }

                BSONObj find = cmdObj.getObjectField( "find" );
                if ( find.isEmpty() ) {
                    find = cmdObj.getObjectField( "middle" );

                    if ( find.isEmpty() ) {
                        errmsg = "need to specify find or middle";
                        return false;
                    }
                }

                ChunkManagerPtr info = config->getChunkManager( ns );
                ChunkPtr chunk = info->findChunk( find );
                BSONObj middle = cmdObj.getObjectField( "middle" );

                assert( chunk.get() );
                log() << "splitting: " << ns << "  shard: " << chunk << endl;

                BSONObj res;
                bool worked;
                if ( middle.isEmpty() ) {
                    BSONObj ret = chunk->singleSplit( true /* force a split even if not enough data */ , res );
                    worked = !ret.isEmpty();
                }
                else {
                    // sanity check if the key provided is a valid split point
                    if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) {
                        errmsg = "cannot split on initial or final chunk's key";
                        return false;
                    }

                    if (!fieldsMatch(middle, info->getShardKey().key())){
                        errmsg = "middle has different fields (or different order) than shard key";
                        return false;
                    }

                    vector<BSONObj> splitPoints;
                    splitPoints.push_back( middle );
                    worked = chunk->multiSplit( splitPoints , res );
                }

                if ( !worked ) {
                    errmsg = "split failed";
                    result.append( "cause" , res );
                    return false;
                }
                config->getChunkManager( ns , true );
                return true;
            }
Example #8
0
        void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){
            int flags = d.pullInt();
            
            BSONObj query = d.nextJsObj();
            uassert( 10201 ,  "invalid update" , d.moreJSObjs() );
            BSONObj toupdate = d.nextJsObj();

            BSONObj chunkFinder = query;
            
            bool upsert = flags & UpdateOption_Upsert;
            bool multi = flags & UpdateOption_Multi;

            uassert( 10202 ,  "can't mix multi and upsert and sharding" , ! ( upsert && multi ) );

            if ( upsert && !(manager->hasShardKey(toupdate) ||
                             (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query))))
            {
                throw UserException( 8012 , "can't upsert something without shard key" );
            }

            bool save = false;
            if ( ! manager->hasShardKey( query ) ){
                if ( multi ){
                }
                else if ( strcmp( query.firstElement().fieldName() , "_id" ) || query.nFields() != 1 ){
                    throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" );
                }
                else {
                    save = true;
                    chunkFinder = toupdate;
                }
            }

            
            if ( ! save ){
                if ( toupdate.firstElement().fieldName()[0] == '$' ){
                    BSONObjIterator ops(toupdate);
                    while(ops.more()){
                        BSONElement op(ops.next());
                        if (op.type() != Object)
                            continue;
                        BSONObjIterator fields(op.embeddedObject());
                        while(fields.more()){
                            const string field = fields.next().fieldName();
                            uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field));
                        }
                    }
                } else if ( manager->hasShardKey( toupdate ) ){
                    uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 );
                } else {
                    uasserted(12376, "shard key must be in update object");
                }
            }
            
            if ( multi ){
                set<Shard> shards;
                manager->getShardsForQuery( shards , chunkFinder );
                int * x = (int*)(r.d().afterNS());
                x[0] |= UpdateOption_Broadcast;
                for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++){
                    doWrite( dbUpdate , r , *i , false );
                }
            }
            else {
                int left = 5;
                while ( true ){
                    try {
                        ChunkPtr c = manager->findChunk( chunkFinder );
                        doWrite( dbUpdate , r , c->getShard() );
                        c->splitIfShould( d.msg().header()->dataLen() );
                        break;
                    }
                    catch ( StaleConfigException& e ){
                        if ( left <= 0 )
                            throw e;
                        left--;
                        log() << "update failed b/c of StaleConfigException, retrying " 
                              << " left:" << left << " ns: " << r.getns() << " query: " << query << endl;
                        r.reset( false );
                        manager = r.getChunkManager();
                    }
                }
            }

        }
Example #9
0
        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
            const int flags = d.reservedField() | InsertOption_ContinueOnError; // ContinueOnError is always on when using sharding.
            map<ChunkPtr, vector<BSONObj> > insertsForChunk; // Group bulk insert for appropriate shards
            try {
                while ( d.moreJSObjs() ) {
                    BSONObj o = d.nextJsObj();
                    if ( ! manager->hasShardKey( o ) ) {

                        bool bad = true;

                        if ( manager->getShardKey().partOfShardKey( "_id" ) ) {
                            BSONObjBuilder b;
                            b.appendOID( "_id" , 0 , true );
                            b.appendElements( o );
                            o = b.obj();
                            bad = ! manager->hasShardKey( o );
                        }

                        if ( bad ) {
                            log() << "tried to insert object with no valid shard key: " << r.getns() << "  " << o << endl;
                            uasserted( 8011 , "tried to insert object with no valid shard key" );
                        }

                    }

                    // Many operations benefit from having the shard key early in the object
                    o = manager->getShardKey().moveToFront(o);
                    insertsForChunk[manager->findChunk(o)].push_back(o);
                }
                for (map<ChunkPtr, vector<BSONObj> >::iterator it = insertsForChunk.begin(); it != insertsForChunk.end(); ++it) {
                    ChunkPtr c = it->first;
                    vector<BSONObj> objs = it->second;
                    const int maxTries = 30;

                    bool gotThrough = false;
                    for ( int i=0; i<maxTries; i++ ) {
                        try {
                            LOG(4) << "  server:" << c->getShard().toString() << " bulk insert " << objs.size() << " documents" << endl;
                            insert( c->getShard() , r.getns() , objs , flags);

                            int bytesWritten = 0;
                            for (vector<BSONObj>::iterator vecIt = objs.begin(); vecIt != objs.end(); ++vecIt) {
                                r.gotInsert(); // Record the correct number of individual inserts
                                bytesWritten += (*vecIt).objsize();
                            }
                            if ( r.getClientInfo()->autoSplitOk() )
                                c->splitIfShould( bytesWritten );
                            gotThrough = true;
                            break;
                        }
                        catch ( StaleConfigException& e ) {
                            int logLevel = i < ( maxTries / 2 );
                            LOG( logLevel ) << "retrying bulk insert of " << objs.size() << " documents because of StaleConfigException: " << e << endl;
                            r.reset();

                            manager = r.getChunkManager();
                            if( ! manager ) {
                                uasserted(14804, "collection no longer sharded");
                            }

                            unsigned long long old = manager->getSequenceNumber();
                            
                            LOG( logLevel ) << "  sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl;
                        }
                        sleepmillis( i * 20 );
                    }

                    assert( inShutdown() || gotThrough ); // not caught below
                }
            } catch (const UserException&){
                if (!d.moreJSObjs()){
                    throw;
                }
                // Ignore and keep going. ContinueOnError is implied with sharding.
            }
        }
Example #10
0
    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) {
        verify( candidateChunks );

        //
        // 1. Check whether there is any sharded collection to be balanced by querying
        // the ShardsNS::collections collection
        //

        auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj());

        if ( NULL == cursor.get() ) {
            warning() << "could not query " << CollectionType::ConfigNS
                      << " while trying to balance" << endl;
            return;
        }

        vector< string > collections;
        while ( cursor->more() ) {
            BSONObj col = cursor->nextSafe();

            // sharded collections will have a shard "key".
            if ( ! col[CollectionType::keyPattern()].eoo() &&
                 ! col[CollectionType::noBalance()].trueValue() ){
                collections.push_back( col[CollectionType::ns()].String() );
            }
            else if( col[CollectionType::noBalance()].trueValue() ){
                LOG(1) << "not balancing collection " << col[CollectionType::ns()].String()
                       << ", explicitly disabled" << endl;
            }

        }
        cursor.reset();

        if ( collections.empty() ) {
            LOG(1) << "no collections to balance" << endl;
            return;
        }

        //
        // 2. Get a list of all the shards that are participating in this balance round
        // along with any maximum allowed quotas and current utilization. We get the
        // latter by issuing db.serverStatus() (mem.mapped) to all shards.
        //
        // TODO: skip unresponsive shards and mark information as stale.
        //

        ShardInfoMap shardInfo;
        Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo);

        if (!loadStatus.isOK()) {
            warning() << "failed to load shard metadata" << causedBy(loadStatus) << endl;
            return;
        }

        if (shardInfo.size() < 2) {
            LOG(1) << "can't balance without more active shards" << endl;
            return;
        }

        OCCASIONALLY warnOnMultiVersion( shardInfo );

        //
        // 3. For each collection, check if the balancing policy recommends moving anything around.
        //

        for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) {
            const string& ns = *it;

            OwnedPointerMap<string, OwnedPointerVector<ChunkType> > shardToChunksMap;
            cursor = conn.query(ChunkType::ConfigNS,
                                QUERY(ChunkType::ns(ns)).sort(ChunkType::min()));

            set<BSONObj> allChunkMinimums;

            while ( cursor->more() ) {
                BSONObj chunkDoc = cursor->nextSafe().getOwned();

                auto_ptr<ChunkType> chunk(new ChunkType());
                string errmsg;
                if (!chunk->parseBSON(chunkDoc, &errmsg)) {
                    error() << "bad chunk format for " << chunkDoc
                            << ": " << errmsg << endl;
                    return;
                }

                allChunkMinimums.insert(chunk->getMin().getOwned());
                OwnedPointerVector<ChunkType>*& chunkList =
                        shardToChunksMap.mutableMap()[chunk->getShard()];

                if (chunkList == NULL) {
                    chunkList = new OwnedPointerVector<ChunkType>();
                }

                chunkList->mutableVector().push_back(chunk.release());
            }
            cursor.reset();

            if (shardToChunksMap.map().empty()) {
                LOG(1) << "skipping empty collection (" << ns << ")";
                continue;
            }

            for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) {
                // this just makes sure there is an entry in shardToChunksMap for every shard
                OwnedPointerVector<ChunkType>*& chunkList =
                        shardToChunksMap.mutableMap()[i->first];

                if (chunkList == NULL) {
                    chunkList = new OwnedPointerVector<ChunkType>();
                }
            }

            DistributionStatus status(shardInfo, shardToChunksMap.map());

            // load tags
            Status result = clusterCreateIndex(TagsType::ConfigNS,
                                               BSON(TagsType::ns() << 1 << TagsType::min() << 1),
                                               true, // unique
                                               WriteConcernOptions::AllConfigs,
                                               NULL);

            if ( !result.isOK() ) {
                warning() << "could not create index tags_1_min_1: " << result.reason() << endl;
                continue;
            }

            cursor = conn.query(TagsType::ConfigNS,
                                QUERY(TagsType::ns(ns)).sort(TagsType::min()));

            vector<TagRange> ranges;

            while ( cursor->more() ) {
                BSONObj tag = cursor->nextSafe();
                TagRange tr(tag[TagsType::min()].Obj().getOwned(),
                            tag[TagsType::max()].Obj().getOwned(),
                            tag[TagsType::tag()].String());
                ranges.push_back(tr);
                uassert(16356,
                        str::stream() << "tag ranges not valid for: " << ns,
                        status.addTagRange(tr) );

            }
            cursor.reset();

            DBConfigPtr cfg = grid.getDBConfig( ns );
            if ( !cfg ) {
                warning() << "could not load db config to balance " << ns << " collection" << endl;
                continue;
            }

            // This line reloads the chunk manager once if this process doesn't know the collection
            // is sharded yet.
            ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true );
            if ( !cm ) {
                warning() << "could not load chunks to balance " << ns << " collection" << endl;
                continue;
            }

            // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks
            bool didAnySplits = false;
            for ( unsigned i = 0; i < ranges.size(); i++ ) {
                BSONObj min = ranges[i].min;

                min = cm->getShardKey().extendRangeBound( min, false );

                if ( allChunkMinimums.count( min ) > 0 )
                    continue;

                didAnySplits = true;

                log() << "ns: " << ns << " need to split on "
                      << min << " because there is a range there" << endl;

                ChunkPtr c = cm->findIntersectingChunk( min );

                vector<BSONObj> splitPoints;
                splitPoints.push_back( min );

                BSONObj res;
                if ( !c->multiSplit( splitPoints, res ) ) {
                    error() << "split failed: " << res << endl;
                }
                else {
                    LOG(1) << "split worked: " << res << endl;
                }
                break;
            }

            if ( didAnySplits ) {
                // state change, just wait till next round
                continue;
            }

            CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime );
            if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) );
        }
    }
Example #11
0
    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) {
        verify( candidateChunks );

        //
        // 1. Check whether there is any sharded collection to be balanced by querying
        // the ShardsNS::collections collection
        //

        auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj());
        vector< string > collections;
        while ( cursor->more() ) {
            BSONObj col = cursor->nextSafe();

            // sharded collections will have a shard "key".
            if ( ! col[CollectionType::keyPattern()].eoo() &&
                 ! col[CollectionType::noBalance()].trueValue() ){
                collections.push_back( col[CollectionType::ns()].String() );
            }
            else if( col[CollectionType::noBalance()].trueValue() ){
                LOG(1) << "not balancing collection " << col[CollectionType::ns()].String()
                       << ", explicitly disabled" << endl;
            }

        }
        cursor.reset();

        if ( collections.empty() ) {
            LOG(1) << "no collections to balance" << endl;
            return;
        }

        //
        // 2. Get a list of all the shards that are participating in this balance round
        // along with any maximum allowed quotas and current utilization. We get the
        // latter by issuing db.serverStatus() (mem.mapped) to all shards.
        //
        // TODO: skip unresponsive shards and mark information as stale.
        //

        vector<Shard> allShards;
        Shard::getAllShards( allShards );
        if ( allShards.size() < 2) {
            LOG(1) << "can't balance without more active shards" << endl;
            return;
        }
        
        ShardInfoMap shardInfo;
        for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) {
            const Shard& s = *it;
            ShardStatus status = s.getStatus();
            shardInfo[ s.getName() ] = ShardInfo( s.getMaxSize(),
                                                  status.mapped(),
                                                  s.isDraining(),
                                                  status.hasOpsQueued(),
                                                  s.tags()
                                                  );
        }

        //
        // 3. For each collection, check if the balancing policy recommends moving anything around.
        //

        for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) {
            const string& ns = *it;

            map< string,vector<BSONObj> > shardToChunksMap;
            cursor = conn.query(ChunkType::ConfigNS,
                                QUERY(ChunkType::ns(ns)).sort(ChunkType::min()));

            set<BSONObj> allChunkMinimums;

            while ( cursor->more() ) {
                BSONObj chunk = cursor->nextSafe().getOwned();
                vector<BSONObj>& chunks = shardToChunksMap[chunk[ChunkType::shard()].String()];
                allChunkMinimums.insert( chunk[ChunkType::min()].Obj() );
                chunks.push_back( chunk );
            }
            cursor.reset();

            if (shardToChunksMap.empty()) {
                LOG(1) << "skipping empty collection (" << ns << ")";
                continue;
            }

            for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) {
                // this just makes sure there is an entry in shardToChunksMap for every shard
                Shard s = *i;
                shardToChunksMap[s.getName()].size();
            }

            DistributionStatus status( shardInfo, shardToChunksMap );

            // load tags
            conn.ensureIndex(TagsType::ConfigNS,
                             BSON(TagsType::ns() << 1 << TagsType::min() << 1),
                             true);

            cursor = conn.query(TagsType::ConfigNS,
                                QUERY(TagsType::ns(ns)).sort(TagsType::min()));

            vector<TagRange> ranges;

            while ( cursor->more() ) {
                BSONObj tag = cursor->nextSafe();
                TagRange tr(tag[TagsType::min()].Obj().getOwned(),
                            tag[TagsType::max()].Obj().getOwned(),
                            tag[TagsType::tag()].String());
                ranges.push_back(tr);
                uassert(16356,
                        str::stream() << "tag ranges not valid for: " << ns,
                        status.addTagRange(tr) );

            }
            cursor.reset();

            DBConfigPtr cfg = grid.getDBConfig( ns );
            verify( cfg );
            ChunkManagerPtr cm = cfg->getChunkManager( ns );
            verify( cm );

            // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks
            bool didAnySplits = false;
            for ( unsigned i = 0; i < ranges.size(); i++ ) {
                BSONObj min = ranges[i].min;

                min = cm->getShardKey().extendRangeBound( min, false );

                if ( allChunkMinimums.count( min ) > 0 )
                    continue;

                didAnySplits = true;

                log() << "ns: " << ns << " need to split on "
                      << min << " because there is a range there" << endl;

                ChunkPtr c = cm->findIntersectingChunk( min );

                vector<BSONObj> splitPoints;
                splitPoints.push_back( min );

                BSONObj res;
                if ( !c->multiSplit( splitPoints, res ) ) {
                    error() << "split failed: " << res << endl;
                }
                else {
                    LOG(1) << "split worked: " << res << endl;
                }
                break;
            }

            if ( didAnySplits ) {
                // state change, just wait till next round
                continue;
            }

            CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime );
            if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) );
        }
    }
Example #12
0
        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
            const int flags = d.reservedField();
            bool keepGoing = flags & InsertOption_KeepGoing; // modified before assertion if should abort

            while ( d.moreJSObjs() ) {
                try {
                    BSONObj o = d.nextJsObj();
                    if ( ! manager->hasShardKey( o ) ) {

                        bool bad = true;

                        if ( manager->getShardKey().partOfShardKey( "_id" ) ) {
                            BSONObjBuilder b;
                            b.appendOID( "_id" , 0 , true );
                            b.appendElements( o );
                            o = b.obj();
                            bad = ! manager->hasShardKey( o );
                        }

                        if ( bad ) {
                            log() << "tried to insert object without shard key: " << r.getns() << "  " << o << endl;
                            uasserted( 8011 , "tried to insert object without shard key" );
                        }

                    }

                    // Many operations benefit from having the shard key early in the object
                    o = manager->getShardKey().moveToFront(o);

                    const int maxTries = 30;

                    bool gotThrough = false;
                    for ( int i=0; i<maxTries; i++ ) {
                        try {
                            ChunkPtr c = manager->findChunk( o );
                            log(4) << "  server:" << c->getShard().toString() << " " << o << endl;
                            insert( c->getShard() , r.getns() , o , flags);

                            r.gotInsert();
                            if ( r.getClientInfo()->autoSplitOk() )
                                c->splitIfShould( o.objsize() );
                            gotThrough = true;
                            break;
                        }
                        catch ( StaleConfigException& e ) {
                            int logLevel = i < ( maxTries / 2 );
                            LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl;
                            r.reset();
                            
                            unsigned long long old = manager->getSequenceNumber();
                            manager = r.getChunkManager();
                            
                            LOG( logLevel ) << "  sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl;

                            if (!manager) {
                                keepGoing = false;
                                uasserted(14804, "collection no longer sharded");
                            }
                        }
                        sleepmillis( i * 20 );
                    }
                    
                    assert( inShutdown() || gotThrough ); // not caught below
                } catch (const UserException&){
                    if (!keepGoing || !d.moreJSObjs()){
                        throw;
                    }
                    // otherwise ignore and keep going
                }
            }
        }
Example #13
0
        virtual void queryOp( Request& r ){
            QueryMessage q( r.d() );

            log(3) << "shard query: " << q.ns << "  " << q.query << endl;
            
            if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") )
                throw UserException( 8010 , "something is wrong, shouldn't see a command here" );

            ChunkManagerPtr info = r.getChunkManager();
            assert( info );
            
            Query query( q.query );

            vector<shared_ptr<ChunkRange> > shards;
            info->getChunksForQuery( shards , query.getFilter()  );
            
            set<ServerAndQuery> servers;
            for ( vector<shared_ptr<ChunkRange> >::iterator i = shards.begin(); i != shards.end(); i++ ){
                shared_ptr<ChunkRange> c = *i;
                //servers.insert( ServerAndQuery( c->getShard().getConnString() , BSONObj() ) ); // ERH ERH ERH 
                servers.insert( ServerAndQuery( c->getShard().getConnString() , c->getFilter() ) );
            }
            
            if ( logLevel > 4 ){
                StringBuilder ss;
                ss << " shard query servers: " << servers.size() << '\n';
                for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ){
                    const ServerAndQuery& s = *i;
                    ss << "       " << s.toString() << '\n';
                }
                log() << ss.str();
            }

            ClusteredCursor * cursor = 0;
            
            BSONObj sort = query.getSort();
            
            if ( sort.isEmpty() ){
                // 1. no sort, can just hit them in serial
                cursor = new SerialServerClusteredCursor( servers , q );
            }
            else {
                int shardKeyOrder = info->getShardKey().canOrder( sort );
                if ( shardKeyOrder ){
                    // 2. sort on shard key, can do in serial intelligently
                    set<ServerAndQuery> buckets;
                    for ( vector<shared_ptr<ChunkRange> >::iterator i = shards.begin(); i != shards.end(); i++ ){
                        shared_ptr<ChunkRange> s = *i;
                        buckets.insert( ServerAndQuery( s->getShard().getConnString() , s->getFilter() , s->getMin() ) );
                    }
                    cursor = new SerialServerClusteredCursor( buckets , q , shardKeyOrder );
                }
                else {
                    // 3. sort on non-sharded key, pull back a portion from each server and iterate slowly
                    cursor = new ParallelSortClusteredCursor( servers , q , sort );
                }
            }

            assert( cursor );
            
            log(5) << "   cursor type: " << cursor->type() << endl;
            shardedCursorTypes.hit( cursor->type() );
            
            if ( query.isExplain() ){
                BSONObj explain = cursor->explain();
                replyToQuery( 0 , r.p() , r.m() , explain );
                delete( cursor );
                return;
            }

            ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor ));
            if ( ! cc->sendNextBatch( r ) ){
                return;
            }
            log(6) << "storing cursor : " << cc->getId() << endl;
            cursorCache.store( cc );
        }
Example #14
0
        void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){
            int flags = d.pullInt();
            
            BSONObj query = d.nextJsObj();
            uassert( 10201 ,  "invalid update" , d.moreJSObjs() );
            BSONObj toupdate = d.nextJsObj();

            BSONObj chunkFinder = query;
            
            bool upsert = flags & UpdateOption_Upsert;
            bool multi = flags & UpdateOption_Multi;

            if ( multi )
                uassert( 10202 ,  "can't mix multi and upsert and sharding" , ! upsert );

            if ( upsert && !(manager->hasShardKey(toupdate) ||
                             (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query))))
            {
                throw UserException( 8012 , "can't upsert something without shard key" );
            }

            bool save = false;
            if ( ! manager->hasShardKey( query ) ){
                if ( multi ){
                }
                else if ( query.nFields() != 1 || strcmp( query.firstElement().fieldName() , "_id" ) ){
                    throw UserException( 8013 , "can't do update with query that doesn't have the shard key" );
                }
                else {
                    save = true;
                    chunkFinder = toupdate;
                }
            }

            
            if ( ! save ){
                if ( toupdate.firstElement().fieldName()[0] == '$' ){
                    BSONObjIterator ops(toupdate);
                    while(ops.more()){
                        BSONElement op(ops.next());
                        if (op.type() != Object)
                            continue;
                        BSONObjIterator fields(op.embeddedObject());
                        while(fields.more()){
                            const string field = fields.next().fieldName();
                            uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field));
                        }
                    }
                } else if ( manager->hasShardKey( toupdate ) ){
                    uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 );
                } else {
                    uasserted(12376, "shard key must be in update object");
                }
            }
            
            if ( multi ){
                vector<shared_ptr<ChunkRange> > chunks;
                manager->getChunksForQuery( chunks , chunkFinder );
                set<Shard> seen;
                for ( vector<shared_ptr<ChunkRange> >::iterator i=chunks.begin(); i!=chunks.end(); i++){
                    shared_ptr<ChunkRange> c = *i;
                    if ( seen.count( c->getShard() ) )
                        continue;
                    doWrite( dbUpdate , r , c->getShard() );
                    seen.insert( c->getShard() );
                }
            }
            else {
                ChunkPtr c = manager->findChunk( chunkFinder );
                doWrite( dbUpdate , r , c->getShard() );
                c->splitIfShould( d.msg().header()->dataLen() );
            }

        }