BalancerPolicy::ChunkInfo* BalancerPolicy::balance( const string& ns, const ShardToLimitsMap& shardToLimitsMap, const ShardToChunksMap& shardToChunksMap, int balancedLastTime ) { pair<string,unsigned> min("",numeric_limits<unsigned>::max()); pair<string,unsigned> max("",0); vector<string> drainingShards; bool maxOpsQueued = false; for (ShardToChunksIter i = shardToChunksMap.begin(); i!=shardToChunksMap.end(); ++i ) { // Find whether this shard's capacity or availability are exhausted const string& shard = i->first; BSONObj shardLimits; ShardToLimitsIter it = shardToLimitsMap.find( shard ); if ( it != shardToLimitsMap.end() ) shardLimits = it->second; const bool maxedOut = isSizeMaxed( shardLimits ); const bool draining = isDraining( shardLimits ); const bool opsQueued = hasOpsQueued( shardLimits ); // Is this shard a better chunk receiver then the current one? // Shards that would be bad receiver candidates: // + maxed out shards // + draining shards // + shards with operations queued for writeback const unsigned size = i->second.size(); if ( ! maxedOut && ! draining && ! opsQueued ) { if ( size < min.second ) { min = make_pair( shard , size ); } } else if ( opsQueued ) { MONGO_LOG(1) << "won't send a chunk to: " << shard << " because it has ops queued" << endl; } else if ( maxedOut ) { MONGO_LOG(1) << "won't send a chunk to: " << shard << " because it is maxedOut" << endl; } // Check whether this shard is a better chunk donor then the current one. // Draining shards take a lower priority than overloaded shards. if ( size > max.second ) { max = make_pair( shard , size ); maxOpsQueued = opsQueued; } if ( draining && (size > 0)) { drainingShards.push_back( shard ); } } // If there is no candidate chunk receiver -- they may have all been maxed out, // draining, ... -- there's not much that the policy can do. if ( min.second == numeric_limits<unsigned>::max() ) { log() << "no available shards to take chunks" << endl; return NULL; } if ( maxOpsQueued ) { log() << "biggest shard " << max.first << " has unprocessed writebacks, waiting for completion of migrate" << endl; return NULL; } MONGO_LOG(1) << "collection : " << ns << endl; MONGO_LOG(1) << "donor : " << max.second << " chunks on " << max.first << endl; MONGO_LOG(1) << "receiver : " << min.second << " chunks on " << min.first << endl; if ( ! drainingShards.empty() ) { string drainingStr; joinStringDelim( drainingShards, &drainingStr, ',' ); MONGO_LOG(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl; } // Solving imbalances takes a higher priority than draining shards. Many shards can // be draining at once but we choose only one of them to cater to per round. // Important to start balanced, so when there are few chunks any imbalance must be fixed. const int imbalance = max.second - min.second; int threshold = 8; if (balancedLastTime || max.second < 20) threshold = 2; else if (max.second < 80) threshold = 4; string from, to; if ( imbalance >= threshold ) { from = max.first; to = min.first; } else if ( ! drainingShards.empty() ) { from = drainingShards[ rand() % drainingShards.size() ]; to = min.first; } else { // Everything is balanced here! return NULL; } const vector<BSONObj>& chunksFrom = shardToChunksMap.find( from )->second; const vector<BSONObj>& chunksTo = shardToChunksMap.find( to )->second; BSONObj chunkToMove = pickChunk( chunksFrom , chunksTo ); log() << "chose [" << from << "] to [" << to << "] " << chunkToMove << endl; return new ChunkInfo( ns, to, from, chunkToMove ); }
BalancerPolicy::ChunkInfo* BalancerPolicy::balance( const string& ns, const ShardToLimitsMap& shardToLimitsMap, const ShardToChunksMap& shardToChunksMap, int balancedLastTime ){ pair<string,unsigned> min("",numeric_limits<unsigned>::max()); pair<string,unsigned> max("",0); vector<string> drainingShards; for (ShardToChunksIter i = shardToChunksMap.begin(); i!=shardToChunksMap.end(); ++i ){ // Find whether this shard has reached its size cap or whether it is being removed. const string& shard = i->first; BSONObj shardLimits; ShardToLimitsIter it = shardToLimitsMap.find( shard ); if ( it != shardToLimitsMap.end() ) shardLimits = it->second; const bool maxedOut = isSizeMaxed( shardLimits ); const bool draining = isDraining( shardLimits ); // Check whether this shard is a better chunk receiver then the current one. // Maxed out shards or draining shards cannot be considered receivers. const unsigned size = i->second.size(); if ( ! maxedOut && ! draining ){ if ( size < min.second ){ min = make_pair( shard , size ); } } // Check whether this shard is a better chunk donor then the current one. // Draining shards take a lower priority than overloaded shards. if ( size > max.second ){ max = make_pair( shard , size ); } if ( draining && (size > 0)){ drainingShards.push_back( shard ); } } // If there is no candidate chunk receiver -- they may have all been maxed out, // draining, ... -- there's not much that the policy can do. if ( min.second == numeric_limits<unsigned>::max() ){ log() << "no availalable shards to take chunks" << endl; return NULL; } log(1) << "collection : " << ns << endl; log(1) << "donor : " << max.second << " chunks on " << max.first << endl; log(1) << "receiver : " << min.second << " chunks on " << min.first << endl; if ( ! drainingShards.empty() ){ string drainingStr; joinStringDelim( drainingShards, &drainingStr, ',' ); log(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl; } // Solving imbalances takes a higher priority than draining shards. Many shards can // be draining at once but we choose only one of them to cater to per round. const int imbalance = max.second - min.second; const int threshold = balancedLastTime ? 2 : 8; string from, to; if ( imbalance >= threshold ){ from = max.first; to = min.first; } else if ( ! drainingShards.empty() ){ from = drainingShards[ rand() % drainingShards.size() ]; to = min.first; } else { // Everything is balanced here! return NULL; } const vector<BSONObj>& chunksFrom = shardToChunksMap.find( from )->second; const vector<BSONObj>& chunksTo = shardToChunksMap.find( to )->second; BSONObj chunkToMove = pickChunk( chunksFrom , chunksTo ); log() << "chose [" << from << "] to [" << to << "] " << chunkToMove << endl; return new ChunkInfo( ns, to, from, chunkToMove ); }