bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { ShardConnection::sync(); string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ) { errmsg = "ns not sharded. have to shard before can split"; return false; } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ) { find = cmdObj.getObjectField( "middle" ); if ( find.isEmpty() ) { errmsg = "need to specify find or middle"; return false; } } ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr chunk = info->findChunk( find ); BSONObj middle = cmdObj.getObjectField( "middle" ); assert( chunk.get() ); log() << "splitting: " << ns << " shard: " << chunk << endl; BSONObj res; ChunkPtr p; if ( middle.isEmpty() ) { p = chunk->singleSplit( true /* force a split even if not enough data */ , res ); } else { // sanity check if the key provided is a valid split point if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) { errmsg = "cannot split on initial or final chunk's key"; return false; } vector<BSONObj> splitPoints; splitPoints.push_back( middle ); p = chunk->multiSplit( splitPoints , res ); } if ( p.get() == NULL ) { errmsg = "split failed"; result.append( "cause" , res ); return false; } return true; }
virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ){ assert( old.get() ); log() << "splitting: " << ns << " shard: " << old << endl; if ( middle.isEmpty() ) old->split(); else { vector<BSONObj> splitPoints; splitPoints.push_back( middle ); old->multiSplit( splitPoints ); } return true; }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj()); if ( NULL == cursor.get() ) { warning() << "could not query " << CollectionType::ConfigNS << " while trying to balance" << endl; return; } vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col[CollectionType::keyPattern()].eoo() && ! col[CollectionType::noBalance()].trueValue() ){ collections.push_back( col[CollectionType::ns()].String() ); } else if( col[CollectionType::noBalance()].trueValue() ){ LOG(1) << "not balancing collection " << col[CollectionType::ns()].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // ShardInfoMap shardInfo; Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo); if (!loadStatus.isOK()) { warning() << "failed to load shard metadata" << causedBy(loadStatus) << endl; return; } if (shardInfo.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } OCCASIONALLY warnOnMultiVersion( shardInfo ); // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; OwnedPointerMap<string, OwnedPointerVector<ChunkType> > shardToChunksMap; cursor = conn.query(ChunkType::ConfigNS, QUERY(ChunkType::ns(ns)).sort(ChunkType::min())); set<BSONObj> allChunkMinimums; while ( cursor->more() ) { BSONObj chunkDoc = cursor->nextSafe().getOwned(); auto_ptr<ChunkType> chunk(new ChunkType()); string errmsg; if (!chunk->parseBSON(chunkDoc, &errmsg)) { error() << "bad chunk format for " << chunkDoc << ": " << errmsg << endl; return; } allChunkMinimums.insert(chunk->getMin().getOwned()); OwnedPointerVector<ChunkType>*& chunkList = shardToChunksMap.mutableMap()[chunk->getShard()]; if (chunkList == NULL) { chunkList = new OwnedPointerVector<ChunkType>(); } chunkList->mutableVector().push_back(chunk.release()); } cursor.reset(); if (shardToChunksMap.map().empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) { // this just makes sure there is an entry in shardToChunksMap for every shard OwnedPointerVector<ChunkType>*& chunkList = shardToChunksMap.mutableMap()[i->first]; if (chunkList == NULL) { chunkList = new OwnedPointerVector<ChunkType>(); } } DistributionStatus status(shardInfo, shardToChunksMap.map()); // load tags Status result = clusterCreateIndex(TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), true, // unique WriteConcernOptions::AllConfigs, NULL); if ( !result.isOK() ) { warning() << "could not create index tags_1_min_1: " << result.reason() << endl; continue; } cursor = conn.query(TagsType::ConfigNS, QUERY(TagsType::ns(ns)).sort(TagsType::min())); vector<TagRange> ranges; while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); TagRange tr(tag[TagsType::min()].Obj().getOwned(), tag[TagsType::max()].Obj().getOwned(), tag[TagsType::tag()].String()); ranges.push_back(tr); uassert(16356, str::stream() << "tag ranges not valid for: " << ns, status.addTagRange(tr) ); } cursor.reset(); DBConfigPtr cfg = grid.getDBConfig( ns ); if ( !cfg ) { warning() << "could not load db config to balance " << ns << " collection" << endl; continue; } // This line reloads the chunk manager once if this process doesn't know the collection // is sharded yet. ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true ); if ( !cm ) { warning() << "could not load chunks to balance " << ns << " collection" << endl; continue; } // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks bool didAnySplits = false; for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; min = cm->getShardKey().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; didAnySplits = true; log() << "ns: " << ns << " need to split on " << min << " because there is a range there" << endl; ChunkPtr c = cm->findIntersectingChunk( min ); vector<BSONObj> splitPoints; splitPoints.push_back( min ); BSONObj res; if ( !c->multiSplit( splitPoints, res ) ) { error() << "split failed: " << res << endl; } else { LOG(1) << "split worked: " << res << endl; } break; } if ( didAnySplits ) { // state change, just wait till next round continue; } CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj()); vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col[CollectionType::keyPattern()].eoo() && ! col[CollectionType::noBalance()].trueValue() ){ collections.push_back( col[CollectionType::ns()].String() ); } else if( col[CollectionType::noBalance()].trueValue() ){ LOG(1) << "not balancing collection " << col[CollectionType::ns()].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } ShardInfoMap shardInfo; for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) { const Shard& s = *it; ShardStatus status = s.getStatus(); shardInfo[ s.getName() ] = ShardInfo( s.getMaxSize(), status.mapped(), s.isDraining(), status.hasOpsQueued(), s.tags() ); } // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; map< string,vector<BSONObj> > shardToChunksMap; cursor = conn.query(ChunkType::ConfigNS, QUERY(ChunkType::ns(ns)).sort(ChunkType::min())); set<BSONObj> allChunkMinimums; while ( cursor->more() ) { BSONObj chunk = cursor->nextSafe().getOwned(); vector<BSONObj>& chunks = shardToChunksMap[chunk[ChunkType::shard()].String()]; allChunkMinimums.insert( chunk[ChunkType::min()].Obj() ); chunks.push_back( chunk ); } cursor.reset(); if (shardToChunksMap.empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) { // this just makes sure there is an entry in shardToChunksMap for every shard Shard s = *i; shardToChunksMap[s.getName()].size(); } DistributionStatus status( shardInfo, shardToChunksMap ); // load tags conn.ensureIndex(TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), true); cursor = conn.query(TagsType::ConfigNS, QUERY(TagsType::ns(ns)).sort(TagsType::min())); vector<TagRange> ranges; while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); TagRange tr(tag[TagsType::min()].Obj().getOwned(), tag[TagsType::max()].Obj().getOwned(), tag[TagsType::tag()].String()); ranges.push_back(tr); uassert(16356, str::stream() << "tag ranges not valid for: " << ns, status.addTagRange(tr) ); } cursor.reset(); DBConfigPtr cfg = grid.getDBConfig( ns ); verify( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( ns ); verify( cm ); // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks bool didAnySplits = false; for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; min = cm->getShardKey().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; didAnySplits = true; log() << "ns: " << ns << " need to split on " << min << " because there is a range there" << endl; ChunkPtr c = cm->findIntersectingChunk( min ); vector<BSONObj> splitPoints; splitPoints.push_back( min ); BSONObj res; if ( !c->multiSplit( splitPoints, res ) ) { error() << "split failed: " << res << endl; } else { LOG(1) << "split worked: " << res << endl; } break; } if ( didAnySplits ) { // state change, just wait till next round continue; } CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
void Balancer::_doBalanceRound(vector<shared_ptr<MigrateInfo>>* candidateChunks) { invariant(candidateChunks); vector<CollectionType> collections; Status collsStatus = grid.catalogManager()->getCollections(nullptr, &collections); if (!collsStatus.isOK()) { warning() << "Failed to retrieve the set of collections during balancing round " << collsStatus; return; } if (collections.empty()) { LOG(1) << "no collections to balance"; return; } // Get a list of all the shards that are participating in this balance round along with any // maximum allowed quotas and current utilization. We get the latter by issuing // db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. ShardInfoMap shardInfo; Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo); if (!loadStatus.isOK()) { warning() << "failed to load shard metadata" << causedBy(loadStatus); return; } if (shardInfo.size() < 2) { LOG(1) << "can't balance without more active shards"; return; } OCCASIONALLY warnOnMultiVersion(shardInfo); // For each collection, check if the balancing policy recommends moving anything around. for (const auto& coll : collections) { // Skip collections for which balancing is disabled const NamespaceString& nss = coll.getNs(); if (!coll.getAllowBalance()) { LOG(1) << "Not balancing collection " << nss << "; explicitly disabled."; continue; } std::vector<ChunkType> allNsChunks; grid.catalogManager()->getChunks(BSON(ChunkType::ns(nss.ns())), BSON(ChunkType::min() << 1), boost::none, // all chunks &allNsChunks); set<BSONObj> allChunkMinimums; map<string, vector<ChunkType>> shardToChunksMap; for (const ChunkType& chunk : allNsChunks) { allChunkMinimums.insert(chunk.getMin().getOwned()); vector<ChunkType>& chunksList = shardToChunksMap[chunk.getShard()]; chunksList.push_back(chunk); } if (shardToChunksMap.empty()) { LOG(1) << "skipping empty collection (" << nss.ns() << ")"; continue; } for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) { // This loop just makes sure there is an entry in shardToChunksMap for every shard shardToChunksMap[i->first]; } DistributionStatus status(shardInfo, shardToChunksMap); // TODO: TagRange contains all the information from TagsType except for the namespace, // so maybe the two can be merged at some point in order to avoid the // transformation below. vector<TagRange> ranges; { vector<TagsType> collectionTags; uassertStatusOK(grid.catalogManager()->getTagsForCollection(nss.ns(), &collectionTags)); for (const auto& tt : collectionTags) { ranges.push_back( TagRange(tt.getMinKey().getOwned(), tt.getMaxKey().getOwned(), tt.getTag())); uassert(16356, str::stream() << "tag ranges not valid for: " << nss.ns(), status.addTagRange(ranges.back())); } } auto statusGetDb = grid.catalogCache()->getDatabase(nss.db().toString()); if (!statusGetDb.isOK()) { warning() << "could not load db config to balance collection [" << nss.ns() << "]: " << statusGetDb.getStatus(); continue; } shared_ptr<DBConfig> cfg = statusGetDb.getValue(); // This line reloads the chunk manager once if this process doesn't know the collection // is sharded yet. shared_ptr<ChunkManager> cm = cfg->getChunkManagerIfExists(nss.ns(), true); if (!cm) { warning() << "could not load chunks to balance " << nss.ns() << " collection"; continue; } // Loop through tags to make sure no chunk spans tags. Split on tag min for all chunks. bool didAnySplits = false; for (const TagRange& range : ranges) { BSONObj min = cm->getShardKeyPattern().getKeyPattern().extendRangeBound(range.min, false); if (allChunkMinimums.count(min) > 0) { continue; } didAnySplits = true; log() << "nss: " << nss.ns() << " need to split on " << min << " because there is a range there"; ChunkPtr c = cm->findIntersectingChunk(min); vector<BSONObj> splitPoints; splitPoints.push_back(min); Status status = c->multiSplit(splitPoints, NULL); if (!status.isOK()) { error() << "split failed: " << status; } else { LOG(1) << "split worked"; } break; } if (didAnySplits) { // State change, just wait till next round continue; } shared_ptr<MigrateInfo> migrateInfo(_policy->balance(nss.ns(), status, _balancedLastTime)); if (migrateInfo) { candidateChunks->push_back(migrateInfo); } } }