Exemplo n.º 1
0
void Balancer::run() {
    Client::initThread("Balancer");

    // This is the body of a BackgroundJob so if we throw here we're basically ending the balancer
    // thread prematurely.
    while (!inShutdown()) {
        auto txn = cc().makeOperationContext();
        if (!_init(txn.get())) {
            log() << "will retry to initialize balancer in one minute";
            sleepsecs(60);
            continue;
        }

        break;
    }

    Seconds balanceRoundInterval(kBalanceRoundDefaultInterval);

    while (!inShutdown()) {
        auto txn = cc().makeOperationContext();

        BalanceRoundDetails roundDetails;

        try {
            // ping has to be first so we keep things in the config server in sync
            _ping(txn.get(), false);

            MONGO_FAIL_POINT_BLOCK(balancerRoundIntervalSetting, scopedBalancerRoundInterval) {
                const BSONObj& data = scopedBalancerRoundInterval.getData();
                balanceRoundInterval = Seconds(data["sleepSecs"].numberInt());
            }

            // Use fresh shard state and balancer settings
            Grid::get(txn.get())->shardRegistry()->reload(txn.get());

            auto balancerConfig = Grid::get(txn.get())->getBalancerConfiguration();
            Status refreshStatus = balancerConfig->refreshAndCheck(txn.get());
            if (!refreshStatus.isOK()) {
                warning() << "Skipping balancing round" << causedBy(refreshStatus);
                sleepFor(balanceRoundInterval);
                continue;
            }

            // now make sure we should even be running
            if (!balancerConfig->isBalancerActive() || MONGO_FAIL_POINT(skipBalanceRound)) {
                LOG(1) << "skipping balancing round because balancing is disabled";

                // Ping again so scripts can determine if we're active without waiting
                _ping(txn.get(), true);

                sleepFor(balanceRoundInterval);
                continue;
            }

            uassert(13258, "oids broken after resetting!", _checkOIDs(txn.get()));

            {
                auto scopedDistLock = grid.catalogManager(txn.get())
                                          ->distLock(txn.get(),
                                                     "balancer",
                                                     "doing balance round",
                                                     DistLockManager::kSingleLockAttemptTimeout);

                if (!scopedDistLock.isOK()) {
                    LOG(1) << "skipping balancing round" << causedBy(scopedDistLock.getStatus());

                    // Ping again so scripts can determine if we're active without waiting
                    _ping(txn.get(), true);

                    sleepFor(balanceRoundInterval);  // no need to wake up soon
                    continue;
                }

                LOG(1) << "*** start balancing round. "
                       << "waitForDelete: " << balancerConfig->waitForDelete()
                       << ", secondaryThrottle: "
                       << balancerConfig->getSecondaryThrottle().toBSON();

                OCCASIONALLY warnOnMultiVersion(
                    uassertStatusOK(_clusterStats->getStats(txn.get())));

                Status status = _enforceTagRanges(txn.get());
                if (!status.isOK()) {
                    warning() << "Failed to enforce tag ranges" << causedBy(status);
                } else {
                    LOG(1) << "Done enforcing tag range boundaries.";
                }

                const auto candidateChunks = uassertStatusOK(
                    _chunkSelectionPolicy->selectChunksToMove(txn.get(), _balancedLastTime));

                if (candidateChunks.empty()) {
                    LOG(1) << "no need to move any chunk";
                    _balancedLastTime = 0;
                } else {
                    _balancedLastTime = _moveChunks(txn.get(),
                                                    candidateChunks,
                                                    balancerConfig->getSecondaryThrottle(),
                                                    balancerConfig->waitForDelete());

                    roundDetails.setSucceeded(static_cast<int>(candidateChunks.size()),
                                              _balancedLastTime);

                    grid.catalogManager(txn.get())
                        ->logAction(txn.get(), "balancer.round", "", roundDetails.toBSON());
                }

                LOG(1) << "*** End of balancing round";
            }

            // Ping again so scripts can determine if we're active without waiting
            _ping(txn.get(), true);

            sleepFor(_balancedLastTime ? kShortBalanceRoundInterval : balanceRoundInterval);
        } catch (const std::exception& e) {
            log() << "caught exception while doing balance: " << e.what();

            // Just to match the opening statement if in log level 1
            LOG(1) << "*** End of balancing round";

            // This round failed, tell the world!
            roundDetails.setFailed(e.what());

            grid.catalogManager(txn.get())
                ->logAction(txn.get(), "balancer.round", "", roundDetails.toBSON());

            // Sleep a fair amount before retrying because of the error
            sleepFor(balanceRoundInterval);
        }
    }
}
Exemplo n.º 2
0
StatusWith<vector<MigrateInfo>> Balancer::_getCandidateChunks(OperationContext* txn) {
    vector<CollectionType> collections;

    Status collsStatus =
        grid.catalogManager(txn)->getCollections(txn, nullptr, &collections, nullptr);
    if (!collsStatus.isOK()) {
        return collsStatus;
    }

    if (collections.empty()) {
        return vector<MigrateInfo>();
    }

    // Get a list of all the shards that are participating in this balance round along with any
    // maximum allowed quotas and current utilization. We get the latter by issuing
    // db.serverStatus() (mem.mapped) to all shards.
    //
    // TODO: skip unresponsive shards and mark information as stale.
    auto shardInfoStatus = DistributionStatus::populateShardInfoMap(txn);
    if (!shardInfoStatus.isOK()) {
        return shardInfoStatus.getStatus();
    }

    const ShardInfoMap shardInfo(std::move(shardInfoStatus.getValue()));

    if (shardInfo.size() < 2) {
        return vector<MigrateInfo>();
    }

    OCCASIONALLY warnOnMultiVersion(shardInfo);

    std::vector<MigrateInfo> candidateChunks;

    // For each collection, check if the balancing policy recommends moving anything around.
    for (const auto& coll : collections) {
        // Skip collections for which balancing is disabled
        const NamespaceString& nss = coll.getNs();

        if (!coll.getAllowBalance()) {
            LOG(1) << "Not balancing collection " << nss << "; explicitly disabled.";
            continue;
        }

        std::vector<ChunkType> allNsChunks;
        Status status = grid.catalogManager(txn)->getChunks(txn,
                                                            BSON(ChunkType::ns(nss.ns())),
                                                            BSON(ChunkType::min() << 1),
                                                            boost::none,  // all chunks
                                                            &allNsChunks,
                                                            nullptr);
        if (!status.isOK()) {
            warning() << "failed to load chunks for ns " << nss.ns() << causedBy(status);
            continue;
        }

        set<BSONObj> allChunkMinimums;
        map<string, vector<ChunkType>> shardToChunksMap;

        for (const ChunkType& chunk : allNsChunks) {
            allChunkMinimums.insert(chunk.getMin().getOwned());

            vector<ChunkType>& chunksList = shardToChunksMap[chunk.getShard()];
            chunksList.push_back(chunk);
        }

        if (shardToChunksMap.empty()) {
            LOG(1) << "skipping empty collection (" << nss.ns() << ")";
            continue;
        }

        for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) {
            // This loop just makes sure there is an entry in shardToChunksMap for every shard
            shardToChunksMap[i->first];
        }

        DistributionStatus distStatus(shardInfo, shardToChunksMap);

        // TODO: TagRange contains all the information from TagsType except for the namespace,
        //       so maybe the two can be merged at some point in order to avoid the
        //       transformation below.
        vector<TagRange> ranges;

        {
            vector<TagsType> collectionTags;
            uassertStatusOK(
                grid.catalogManager(txn)->getTagsForCollection(txn, nss.ns(), &collectionTags));
            for (const auto& tt : collectionTags) {
                ranges.push_back(
                    TagRange(tt.getMinKey().getOwned(), tt.getMaxKey().getOwned(), tt.getTag()));
                uassert(16356,
                        str::stream() << "tag ranges not valid for: " << nss.ns(),
                        distStatus.addTagRange(ranges.back()));
            }
        }

        auto statusGetDb = grid.catalogCache()->getDatabase(txn, nss.db().toString());
        if (!statusGetDb.isOK()) {
            warning() << "could not load db config to balance collection [" << nss.ns()
                      << "]: " << statusGetDb.getStatus();
            continue;
        }

        shared_ptr<DBConfig> cfg = statusGetDb.getValue();

        // This line reloads the chunk manager once if this process doesn't know the collection
        // is sharded yet.
        shared_ptr<ChunkManager> cm = cfg->getChunkManagerIfExists(txn, nss.ns(), true);
        if (!cm) {
            warning() << "could not load chunks to balance " << nss.ns() << " collection";
            continue;
        }

        // Loop through tags to make sure no chunk spans tags. Split on tag min for all chunks.
        bool didAnySplits = false;

        for (const TagRange& range : ranges) {
            BSONObj min =
                cm->getShardKeyPattern().getKeyPattern().extendRangeBound(range.min, false);

            if (allChunkMinimums.count(min) > 0) {
                continue;
            }

            didAnySplits = true;

            log() << "nss: " << nss.ns() << " need to split on " << min
                  << " because there is a range there";

            vector<BSONObj> splitPoints;
            splitPoints.push_back(min);

            shared_ptr<Chunk> c = cm->findIntersectingChunk(txn, min);
            Status status = c->multiSplit(txn, splitPoints, NULL);
            if (!status.isOK()) {
                error() << "split failed: " << status;
            } else {
                LOG(1) << "split worked";
            }

            break;
        }

        if (didAnySplits) {
            // State change, just wait till next round
            continue;
        }

        shared_ptr<MigrateInfo> migrateInfo(
            BalancerPolicy::balance(nss.ns(), distStatus, _balancedLastTime));
        if (migrateInfo) {
            candidateChunks.emplace_back(*migrateInfo);
        }
    }

    return candidateChunks;
}
Exemplo n.º 3
0
    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) {
        verify( candidateChunks );

        //
        // 1. Check whether there is any sharded collection to be balanced by querying
        // the ShardsNS::collections collection
        //

        auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj());

        if ( NULL == cursor.get() ) {
            warning() << "could not query " << CollectionType::ConfigNS
                      << " while trying to balance" << endl;
            return;
        }

        vector< string > collections;
        while ( cursor->more() ) {
            BSONObj col = cursor->nextSafe();

            // sharded collections will have a shard "key".
            if ( ! col[CollectionType::keyPattern()].eoo() &&
                 ! col[CollectionType::noBalance()].trueValue() ){
                collections.push_back( col[CollectionType::ns()].String() );
            }
            else if( col[CollectionType::noBalance()].trueValue() ){
                LOG(1) << "not balancing collection " << col[CollectionType::ns()].String()
                       << ", explicitly disabled" << endl;
            }

        }
        cursor.reset();

        if ( collections.empty() ) {
            LOG(1) << "no collections to balance" << endl;
            return;
        }

        //
        // 2. Get a list of all the shards that are participating in this balance round
        // along with any maximum allowed quotas and current utilization. We get the
        // latter by issuing db.serverStatus() (mem.mapped) to all shards.
        //
        // TODO: skip unresponsive shards and mark information as stale.
        //

        ShardInfoMap shardInfo;
        Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo);

        if (!loadStatus.isOK()) {
            warning() << "failed to load shard metadata" << causedBy(loadStatus) << endl;
            return;
        }

        if (shardInfo.size() < 2) {
            LOG(1) << "can't balance without more active shards" << endl;
            return;
        }

        OCCASIONALLY warnOnMultiVersion( shardInfo );

        //
        // 3. For each collection, check if the balancing policy recommends moving anything around.
        //

        for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) {
            const string& ns = *it;

            OwnedPointerMap<string, OwnedPointerVector<ChunkType> > shardToChunksMap;
            cursor = conn.query(ChunkType::ConfigNS,
                                QUERY(ChunkType::ns(ns)).sort(ChunkType::min()));

            set<BSONObj> allChunkMinimums;

            while ( cursor->more() ) {
                BSONObj chunkDoc = cursor->nextSafe().getOwned();

                auto_ptr<ChunkType> chunk(new ChunkType());
                string errmsg;
                if (!chunk->parseBSON(chunkDoc, &errmsg)) {
                    error() << "bad chunk format for " << chunkDoc
                            << ": " << errmsg << endl;
                    return;
                }

                allChunkMinimums.insert(chunk->getMin().getOwned());
                OwnedPointerVector<ChunkType>*& chunkList =
                        shardToChunksMap.mutableMap()[chunk->getShard()];

                if (chunkList == NULL) {
                    chunkList = new OwnedPointerVector<ChunkType>();
                }

                chunkList->mutableVector().push_back(chunk.release());
            }
            cursor.reset();

            if (shardToChunksMap.map().empty()) {
                LOG(1) << "skipping empty collection (" << ns << ")";
                continue;
            }

            for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) {
                // this just makes sure there is an entry in shardToChunksMap for every shard
                OwnedPointerVector<ChunkType>*& chunkList =
                        shardToChunksMap.mutableMap()[i->first];

                if (chunkList == NULL) {
                    chunkList = new OwnedPointerVector<ChunkType>();
                }
            }

            DistributionStatus status(shardInfo, shardToChunksMap.map());

            // load tags
            Status result = clusterCreateIndex(TagsType::ConfigNS,
                                               BSON(TagsType::ns() << 1 << TagsType::min() << 1),
                                               true, // unique
                                               WriteConcernOptions::AllConfigs,
                                               NULL);

            if ( !result.isOK() ) {
                warning() << "could not create index tags_1_min_1: " << result.reason() << endl;
                continue;
            }

            cursor = conn.query(TagsType::ConfigNS,
                                QUERY(TagsType::ns(ns)).sort(TagsType::min()));

            vector<TagRange> ranges;

            while ( cursor->more() ) {
                BSONObj tag = cursor->nextSafe();
                TagRange tr(tag[TagsType::min()].Obj().getOwned(),
                            tag[TagsType::max()].Obj().getOwned(),
                            tag[TagsType::tag()].String());
                ranges.push_back(tr);
                uassert(16356,
                        str::stream() << "tag ranges not valid for: " << ns,
                        status.addTagRange(tr) );

            }
            cursor.reset();

            DBConfigPtr cfg = grid.getDBConfig( ns );
            if ( !cfg ) {
                warning() << "could not load db config to balance " << ns << " collection" << endl;
                continue;
            }

            // This line reloads the chunk manager once if this process doesn't know the collection
            // is sharded yet.
            ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true );
            if ( !cm ) {
                warning() << "could not load chunks to balance " << ns << " collection" << endl;
                continue;
            }

            // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks
            bool didAnySplits = false;
            for ( unsigned i = 0; i < ranges.size(); i++ ) {
                BSONObj min = ranges[i].min;

                min = cm->getShardKey().extendRangeBound( min, false );

                if ( allChunkMinimums.count( min ) > 0 )
                    continue;

                didAnySplits = true;

                log() << "ns: " << ns << " need to split on "
                      << min << " because there is a range there" << endl;

                ChunkPtr c = cm->findIntersectingChunk( min );

                vector<BSONObj> splitPoints;
                splitPoints.push_back( min );

                BSONObj res;
                if ( !c->multiSplit( splitPoints, res ) ) {
                    error() << "split failed: " << res << endl;
                }
                else {
                    LOG(1) << "split worked: " << res << endl;
                }
                break;
            }

            if ( didAnySplits ) {
                // state change, just wait till next round
                continue;
            }

            CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime );
            if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) );
        }
    }
Exemplo n.º 4
0
    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) {
        verify( candidateChunks );

        //
        // 1. Check whether there is any sharded collection to be balanced by querying
        // the ShardsNS::collections collection
        //

        auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj());
        vector< string > collections;
        while ( cursor->more() ) {
            BSONObj col = cursor->nextSafe();

            // sharded collections will have a shard "key".
            if ( ! col[CollectionType::keyPattern()].eoo() &&
                 ! col[CollectionType::noBalance()].trueValue() ){
                collections.push_back( col[CollectionType::ns()].String() );
            }
            else if( col[CollectionType::noBalance()].trueValue() ){
                LOG(1) << "not balancing collection " << col[CollectionType::ns()].String()
                       << ", explicitly disabled" << endl;
            }

        }
        cursor.reset();

        if ( collections.empty() ) {
            LOG(1) << "no collections to balance" << endl;
            return;
        }

        //
        // 2. Get a list of all the shards that are participating in this balance round
        // along with any maximum allowed quotas and current utilization. We get the
        // latter by issuing db.serverStatus() (mem.mapped) to all shards.
        //
        // TODO: skip unresponsive shards and mark information as stale.
        //

        vector<Shard> allShards;
        Shard::getAllShards( allShards );
        if ( allShards.size() < 2) {
            LOG(1) << "can't balance without more active shards" << endl;
            return;
        }
        
        ShardInfoMap shardInfo;
        for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) {
            const Shard& s = *it;
            ShardStatus status = s.getStatus();
            shardInfo[ s.getName() ] = ShardInfo( s.getMaxSize(),
                                                  status.mapped(),
                                                  s.isDraining(),
                                                  status.hasOpsQueued(),
                                                  s.tags(),
                                                  status.mongoVersion()
                                                  );
        }

        OCCASIONALLY warnOnMultiVersion( shardInfo );

        //
        // 3. For each collection, check if the balancing policy recommends moving anything around.
        //

        for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) {
            const string& ns = *it;

            map< string,vector<BSONObj> > shardToChunksMap;
            cursor = conn.query(ChunkType::ConfigNS,
                                QUERY(ChunkType::ns(ns)).sort(ChunkType::min()));

            set<BSONObj> allChunkMinimums;

            while ( cursor->more() ) {
                BSONObj chunk = cursor->nextSafe().getOwned();
                vector<BSONObj>& chunks = shardToChunksMap[chunk[ChunkType::shard()].String()];
                allChunkMinimums.insert( chunk[ChunkType::min()].Obj() );
                chunks.push_back( chunk );
            }
            cursor.reset();

            if (shardToChunksMap.empty()) {
                LOG(1) << "skipping empty collection (" << ns << ")";
                continue;
            }

            for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) {
                // this just makes sure there is an entry in shardToChunksMap for every shard
                Shard s = *i;
                shardToChunksMap[s.getName()].size();
            }

            DistributionStatus status( shardInfo, shardToChunksMap );

            // load tags
            conn.ensureIndex(TagsType::ConfigNS,
                             BSON(TagsType::ns() << 1 << TagsType::min() << 1),
                             true);

            cursor = conn.query(TagsType::ConfigNS,
                                QUERY(TagsType::ns(ns)).sort(TagsType::min()));

            vector<TagRange> ranges;

            while ( cursor->more() ) {
                BSONObj tag = cursor->nextSafe();
                TagRange tr(tag[TagsType::min()].Obj().getOwned(),
                            tag[TagsType::max()].Obj().getOwned(),
                            tag[TagsType::tag()].String());
                ranges.push_back(tr);
                uassert(16356,
                        str::stream() << "tag ranges not valid for: " << ns,
                        status.addTagRange(tr) );

            }
            cursor.reset();

            DBConfigPtr cfg = grid.getDBConfig( ns );
            if ( !cfg ) {
                warning() << "could not load db config to balance " << ns << " collection" << endl;
                continue;
            }

            // This line reloads the chunk manager once if this process doesn't know the collection
            // is sharded yet.
            ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true );
            if ( !cm ) {
                warning() << "could not load chunks to balance " << ns << " collection" << endl;
                continue;
            }

            // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks
            bool didAnySplits = false;
            for ( unsigned i = 0; i < ranges.size(); i++ ) {
                BSONObj min = ranges[i].min;

                min = cm->getShardKey().extendRangeBound( min, false );

                if ( allChunkMinimums.count( min ) > 0 )
                    continue;

                didAnySplits = true;

                log() << "ns: " << ns << " need to split on "
                      << min << " because there is a range there" << endl;

                ChunkPtr c = cm->findIntersectingChunk( min );

                vector<BSONObj> splitPoints;
                splitPoints.push_back( min );

                BSONObj res;
                if ( !c->multiSplit( splitPoints, res ) ) {
                    error() << "split failed: " << res << endl;
                }
                else {
                    LOG(1) << "split worked: " << res << endl;
                }
                break;
            }

            if ( didAnySplits ) {
                // state change, just wait till next round
                continue;
            }

            CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime );
            if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) );
        }
    }