Shard Shard::pick( const Shard& current ) { vector<Shard> all; staticShardInfo.getAllShards( all ); if ( all.size() == 0 ) { staticShardInfo.reload(); staticShardInfo.getAllShards( all ); if ( all.size() == 0 ) return EMPTY; } // if current shard was provided, pick a different shard only if it is a better choice ShardStatus best = all[0].getStatus(); if ( current != EMPTY ) { best = current.getStatus(); } for ( size_t i=0; i<all.size(); i++ ) { ShardStatus t = all[i].getStatus(); if ( t < best ) best = t; } LOG(1) << "best shard for new allocation is " << best << endl; return best.shard(); }
void DistributionStatus::populateShardInfoMap(const vector<Shard> allShards, ShardInfoMap* shardInfo) { for (vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) { const Shard& shard = *it; ShardStatus status = shard.getStatus(); shardInfo->insert(make_pair(shard.getName(), ShardInfo(shard.getMaxSize(), status.mapped(), shard.isDraining(), status.hasOpsQueued(), shard.tags(), status.mongoVersion()))); } }
Shard Shard::pick(){ vector<Shard> all; staticShardInfo.getAllShards( all ); if ( all.size() == 0 ){ staticShardInfo.reload(); staticShardInfo.getAllShards( all ); if ( all.size() == 0 ) return EMPTY; } ShardStatus best = all[0].getStatus(); for ( size_t i=1; i<all.size(); i++ ){ ShardStatus t = all[i].getStatus(); if ( t < best ) best = t; } log(1) << "picking shard: " << best << endl; return best.shard(); }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query( ShardNS::collection , BSONObj() ); vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col["key"].eoo() && ! col["noBalance"].trueValue() ){ collections.push_back( col["_id"].String() ); } else if( col["noBalance"].trueValue() ){ LOG(1) << "not balancing collection " << col["_id"].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } ShardInfoMap shardInfo; for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) { const Shard& s = *it; ShardStatus status = s.getStatus(); shardInfo[ s.getName() ] = ShardInfo( s.getMaxSize(), status.mapped(), s.isDraining(), status.hasOpsQueued(), s.tags() ); } // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; map< string,vector<BSONObj> > shardToChunksMap; cursor = conn.query( ShardNS::chunk , QUERY( "ns" << ns ).sort( "min" ) ); while ( cursor->more() ) { BSONObj chunk = cursor->nextSafe(); if ( chunk["jumbo"].trueValue() ) continue; vector<BSONObj>& chunks = shardToChunksMap[chunk["shard"].String()]; chunks.push_back( chunk.getOwned() ); } cursor.reset(); if (shardToChunksMap.empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) { // this just makes sure there is an entry in shardToChunksMap for every shard Shard s = *i; shardToChunksMap[s.getName()].size(); } DistributionStatus status( shardInfo, shardToChunksMap ); // load tags conn.ensureIndex( ShardNS::tags, BSON( "ns" << 1 << "min" << 1 ), true ); cursor = conn.query( ShardNS::tags , QUERY( "ns" << ns ).sort( "min" ) ); while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); uassert( 16356 , str::stream() << "tag ranges not valid for: " << ns , status.addTagRange( TagRange( tag["min"].Obj().getOwned(), tag["max"].Obj().getOwned(), tag["tag"].String() ) ) ); } cursor.reset(); CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ){ assert( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::database collection // // { "_id" : "test", "partitioned" : true, "primary" : "shard0", // "sharded" : { // "test.images" : { "key" : { "_id" : 1 }, "unique" : false }, // ... // } // } // auto_ptr<DBClientCursor> cursor = conn.query( ShardNS::database , BSON( "partitioned" << true ) ); vector< string > collections; while ( cursor->more() ){ BSONObj db = cursor->next(); // A database may be partitioned but not yet have a sharded collection. // 'cursor' will point to docs that do not contain the "sharded" key. Since // there'd be nothing to balance, we want to skip those here. BSONElement shardedColls = db["sharded"]; if ( shardedColls.eoo() ){ log(2) << "balancer: skipping database with no sharded collection (" << db["_id"].str() << ")" << endl; continue; } BSONObjIterator i( shardedColls.Obj() ); while ( i.more() ){ BSONElement e = i.next(); collections.push_back( e.fieldName() ); } } cursor.reset(); if ( collections.empty() ) { log(1) << "balancer: no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { log(1) << "balancer: can't balance without more active shards" << endl; return; } map< string, BSONObj > shardLimitsMap; for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ){ const Shard& s = *it; ShardStatus status = s.getStatus(); BSONObj limitsObj = BSON( ShardFields::maxSize( s.getMaxSize() ) << ShardFields::currSize( status.mapped() ) << ShardFields::draining( s.isDraining()) ); shardLimitsMap[ s.getName() ] = limitsObj; } // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; map< string,vector<BSONObj> > shardToChunksMap; cursor = conn.query( ShardNS::chunk , QUERY( "ns" << ns ).sort( "min" ) ); while ( cursor->more() ){ BSONObj chunk = cursor->next(); vector<BSONObj>& chunks = shardToChunksMap[chunk["shard"].String()]; chunks.push_back( chunk.getOwned() ); } cursor.reset(); if (shardToChunksMap.empty()) { log(1) << "balancer: skipping empty collection (" << ns << ")"; continue; } for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ){ // this just makes sure there is an entry in shardToChunksMap for every shard Shard s = *i; shardToChunksMap[s.getName()].size(); } CandidateChunk* p = _policy->balance( ns , shardLimitsMap , shardToChunksMap , _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
bool ShardStatus::operator<(const ShardStatus& other) const { return dataSizeBytes() < other.dataSizeBytes(); }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { assert( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query( ShardNS::collection , BSONObj() ); vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col["key"].eoo() ) collections.push_back( col["_id"].String() ); } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } map< string, BSONObj > shardLimitsMap; for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) { const Shard& s = *it; ShardStatus status = s.getStatus(); BSONObj limitsObj = BSON( ShardFields::maxSize( s.getMaxSize() ) << LimitsFields::currSize( status.mapped() ) << ShardFields::draining( s.isDraining() ) << LimitsFields::hasOpsQueued( status.hasOpsQueued() ) ); shardLimitsMap[ s.getName() ] = limitsObj; } // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; map< string,vector<BSONObj> > shardToChunksMap; cursor = conn.query( ShardNS::chunk , QUERY( "ns" << ns ).sort( "min" ) ); while ( cursor->more() ) { BSONObj chunk = cursor->nextSafe(); if ( chunk["jumbo"].trueValue() ) continue; vector<BSONObj>& chunks = shardToChunksMap[chunk["shard"].String()]; chunks.push_back( chunk.getOwned() ); } cursor.reset(); if (shardToChunksMap.empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) { // this just makes sure there is an entry in shardToChunksMap for every shard Shard s = *i; shardToChunksMap[s.getName()].size(); } CandidateChunk* p = _policy->balance( ns , shardLimitsMap , shardToChunksMap , _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj()); vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col[CollectionType::keyPattern()].eoo() && ! col[CollectionType::noBalance()].trueValue() ){ collections.push_back( col[CollectionType::ns()].String() ); } else if( col[CollectionType::noBalance()].trueValue() ){ LOG(1) << "not balancing collection " << col[CollectionType::ns()].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } ShardInfoMap shardInfo; for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) { const Shard& s = *it; ShardStatus status = s.getStatus(); shardInfo[ s.getName() ] = ShardInfo( s.getMaxSize(), status.mapped(), s.isDraining(), status.hasOpsQueued(), s.tags() ); } // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; map< string,vector<BSONObj> > shardToChunksMap; cursor = conn.query(ChunkType::ConfigNS, QUERY(ChunkType::ns(ns)).sort(ChunkType::min())); set<BSONObj> allChunkMinimums; while ( cursor->more() ) { BSONObj chunk = cursor->nextSafe().getOwned(); vector<BSONObj>& chunks = shardToChunksMap[chunk[ChunkType::shard()].String()]; allChunkMinimums.insert( chunk[ChunkType::min()].Obj() ); chunks.push_back( chunk ); } cursor.reset(); if (shardToChunksMap.empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) { // this just makes sure there is an entry in shardToChunksMap for every shard Shard s = *i; shardToChunksMap[s.getName()].size(); } DistributionStatus status( shardInfo, shardToChunksMap ); // load tags conn.ensureIndex(TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), true); cursor = conn.query(TagsType::ConfigNS, QUERY(TagsType::ns(ns)).sort(TagsType::min())); vector<TagRange> ranges; while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); TagRange tr(tag[TagsType::min()].Obj().getOwned(), tag[TagsType::max()].Obj().getOwned(), tag[TagsType::tag()].String()); ranges.push_back(tr); uassert(16356, str::stream() << "tag ranges not valid for: " << ns, status.addTagRange(tr) ); } cursor.reset(); DBConfigPtr cfg = grid.getDBConfig( ns ); verify( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( ns ); verify( cm ); // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks bool didAnySplits = false; for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; min = cm->getShardKey().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; didAnySplits = true; log() << "ns: " << ns << " need to split on " << min << " because there is a range there" << endl; ChunkPtr c = cm->findIntersectingChunk( min ); vector<BSONObj> splitPoints; splitPoints.push_back( min ); BSONObj res; if ( !c->multiSplit( splitPoints, res ) ) { error() << "split failed: " << res << endl; } else { LOG(1) << "split worked: " << res << endl; } break; } if ( didAnySplits ) { // state change, just wait till next round continue; } CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }