Status ChunkManagerTargeter::targetInsert( const BSONObj& doc, ShardEndpoint** endpoint ) const { if ( !_primary && !_manager ) { return Status( ErrorCodes::NamespaceNotFound, str::stream() << "could not target insert in collection " << getNS().ns() << "; no metadata found" ); } if ( _primary ) { *endpoint = new ShardEndpoint( _primary->getName(), ChunkVersion::UNSHARDED() ); } else { // // Sharded collections have the following requirements for targeting: // // Inserts must contain the exact shard key. // if ( !_manager->hasShardKey( doc ) ) { return Status( ErrorCodes::ShardKeyNotFound, stream() << "document " << doc << " does not contain shard key for pattern " << _manager->getShardKey().key() ); } ChunkPtr chunk = _manager->findChunkForDoc( doc ); *endpoint = new ShardEndpoint( chunk->getShard().getName(), _manager->getVersion( chunk->getShard() ) ); // Track autosplit stats for sharded collections _stats->chunkSizeDelta[chunk->getMin()] += doc.objsize(); } return Status::OK(); }
void DistributionStatus::populateShardToChunksMap(const ShardStatisticsVector& allShards, const ChunkManager& chunkMgr, ShardToChunksMap* shardToChunksMap) { // Makes sure there is an entry in shardToChunksMap for every shard. for (const auto& stat : allShards) { (*shardToChunksMap)[stat.shardId]; } const ChunkMap& chunkMap = chunkMgr.getChunkMap(); for (ChunkMap::const_iterator it = chunkMap.begin(); it != chunkMap.end(); ++it) { const ChunkPtr chunkPtr = it->second; ChunkType chunk; chunk.setNS(chunkMgr.getns()); chunk.setMin(chunkPtr->getMin().getOwned()); chunk.setMax(chunkPtr->getMax().getOwned()); chunk.setJumbo(chunkPtr->isJumbo()); // TODO: is this reliable? const string shardName(chunkPtr->getShardId()); chunk.setShard(shardName); (*shardToChunksMap)[shardName].push_back(chunk); } }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( ! okForConfigChanges( errmsg ) ) return false; ShardConnection::sync(); string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ) { config->reload(); if ( ! config->isSharded( ns ) ) { errmsg = "ns not sharded. have to shard before can split"; return false; } } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ) { find = cmdObj.getObjectField( "middle" ); if ( find.isEmpty() ) { errmsg = "need to specify find or middle"; return false; } } ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr chunk = info->findChunk( find ); BSONObj middle = cmdObj.getObjectField( "middle" ); assert( chunk.get() ); log() << "splitting: " << ns << " shard: " << chunk << endl; BSONObj res; bool worked; if ( middle.isEmpty() ) { BSONObj ret = chunk->singleSplit( true /* force a split even if not enough data */ , res ); worked = !ret.isEmpty(); } else { // sanity check if the key provided is a valid split point if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) { errmsg = "cannot split on initial or final chunk's key"; return false; } if (!fieldsMatch(middle, info->getShardKey().key())){ errmsg = "middle has different fields (or different order) than shard key"; return false; } vector<BSONObj> splitPoints; splitPoints.push_back( middle ); worked = chunk->multiSplit( splitPoints , res ); } if ( !worked ) { errmsg = "split failed"; result.append( "cause" , res ); return false; } config->getChunkManager( ns , true ); return true; }
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks , bool secondaryThrottle, bool waitForDelete ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); verify( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); ChunkPtr c = cm->findIntersectingChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); verify( cm ); c = cm->findIntersectingChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , Chunk::MaxChunkSize , secondaryThrottle , waitForDelete, res ) ) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); c = cm->findIntersectingChunk( chunkInfo.chunk.min ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; if ( ! res["ok"].trueValue() ) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } return movedCount; }
int Balancer::_moveChunks(const vector<CandidateChunkPtr>* candidateChunks, bool secondaryThrottle, bool waitForDelete) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); // Changes to metadata, borked metadata, and connectivity problems should cause us to // abort this chunk move, but shouldn't cause us to abort the entire round of chunks. // TODO: Handle all these things more cleanly, since they're expected problems try { DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); verify( cfg ); // NOTE: We purposely do not reload metadata here, since _doBalanceRound already // tried to do so once. ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); ChunkPtr c = cm->findIntersectingChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); verify( cm ); c = cm->findIntersectingChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if (c->moveAndCommit(Shard::make(chunkInfo.to), Chunk::MaxChunkSize, secondaryThrottle, waitForDelete, 0, /* maxTimeMS */ res)) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); c = cm->findIntersectingChunk( chunkInfo.chunk.min ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; if ( ! res["ok"].trueValue() ) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } catch( const DBException& ex ) { warning() << "could not move chunk " << chunkInfo.chunk.toString() << ", continuing balancing round" << causedBy( ex ) << endl; } } return movedCount; }
Status ChunkManagerTargeter::targetUpdate( const BatchedUpdateDocument& updateDoc, vector<ShardEndpoint*>* endpoints ) const { // // Update targeting may use either the query or the update. This is to support save-style // updates, of the form: // // coll.update({ _id : xxx }, { _id : xxx, shardKey : 1, foo : bar }, { upsert : true }) // // Because drivers do not know the shard key, they can't pull the shard key automatically // into the query doc, and to correctly support upsert we must target a single shard. // // The rule is simple - If the update is replacement style (no '$set'), we target using the // update. If the update is replacement style, we target using the query. // BSONObj query = updateDoc.getQuery(); BSONObj updateExpr = updateDoc.getUpdateExpr(); UpdateType updateType = getUpdateExprType( updateDoc.getUpdateExpr() ); if ( updateType == UpdateType_Unknown ) { return Status( ErrorCodes::UnsupportedFormat, stream() << "update document " << updateExpr << " has mixed $operator and non-$operator style fields" ); } BSONObj targetedDoc = updateType == UpdateType_OpStyle ? query : updateExpr; bool exactShardKeyQuery = false; if ( _manager ) { // // Sharded collections have the following futher requirements for targeting: // // Upserts must be targeted exactly by shard key. // Non-multi updates must be targeted exactly by shard key *or* exact _id. // exactShardKeyQuery = _manager->hasTargetableShardKey(targetedDoc); if ( updateDoc.getUpsert() && !exactShardKeyQuery ) { return Status( ErrorCodes::ShardKeyNotFound, stream() << "upsert " << updateDoc.toBSON() << " does not contain shard key for pattern " << _manager->getShardKey().key() ); } bool exactIdQuery = isExactIdQuery( updateDoc.getQuery() ); if ( !updateDoc.getMulti() && !exactShardKeyQuery && !exactIdQuery ) { return Status( ErrorCodes::ShardKeyNotFound, stream() << "update " << updateDoc.toBSON() << " does not contain _id or shard key for pattern " << _manager->getShardKey().key() ); } // Track autosplit stats for sharded collections // Note: this is only best effort accounting and is not accurate. if ( exactShardKeyQuery ) { ChunkPtr chunk = _manager->findChunkForDoc(targetedDoc); _stats->chunkSizeDelta[chunk->getMin()] += ( query.objsize() + updateExpr.objsize() ); } } Status result = Status::OK(); if (exactShardKeyQuery) { // We can't rely on our query targeting to be exact ShardEndpoint* endpoint = NULL; result = targetShardKey(targetedDoc, &endpoint); endpoints->push_back(endpoint); invariant(result.isOK()); invariant(NULL != endpoint); } else { result = targetQuery(targetedDoc, endpoints); } return result; }
int Balancer::_moveChunks(OperationContext* txn, const vector<MigrateInfo>& candidateChunks, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete) { int movedCount = 0; for (const auto& migrateInfo : candidateChunks) { // If the balancer was disabled since we started this round, don't start new chunks // moves. const auto balSettingsResult = grid.catalogManager(txn)->getGlobalSettings(txn, SettingsType::BalancerDocKey); const bool isBalSettingsAbsent = balSettingsResult.getStatus() == ErrorCodes::NoMatchingDocument; if (!balSettingsResult.isOK() && !isBalSettingsAbsent) { warning() << balSettingsResult.getStatus(); return movedCount; } const SettingsType& balancerConfig = isBalSettingsAbsent ? SettingsType{} : balSettingsResult.getValue(); if ((!isBalSettingsAbsent && !Chunk::shouldBalance(balancerConfig)) || MONGO_FAIL_POINT(skipBalanceRound)) { LOG(1) << "Stopping balancing round early as balancing was disabled"; return movedCount; } // Changes to metadata, borked metadata, and connectivity problems between shards // should cause us to abort this chunk move, but shouldn't cause us to abort the entire // round of chunks. // // TODO(spencer): We probably *should* abort the whole round on issues communicating // with the config servers, but its impossible to distinguish those types of failures // at the moment. // // TODO: Handle all these things more cleanly, since they're expected problems const NamespaceString nss(migrateInfo.ns); try { shared_ptr<DBConfig> cfg = uassertStatusOK(grid.catalogCache()->getDatabase(txn, nss.db().toString())); // NOTE: We purposely do not reload metadata here, since _getCandidateChunks already // tried to do so once shared_ptr<ChunkManager> cm = cfg->getChunkManager(txn, migrateInfo.ns); uassert(28628, str::stream() << "Collection " << migrateInfo.ns << " was deleted while balancing was active. Aborting balancing round.", cm); ChunkPtr c = cm->findIntersectingChunk(txn, migrateInfo.chunk.min); if (c->getMin().woCompare(migrateInfo.chunk.min) || c->getMax().woCompare(migrateInfo.chunk.max)) { // Likely a split happened somewhere, so force reload the chunk manager cm = cfg->getChunkManager(txn, migrateInfo.ns, true); invariant(cm); c = cm->findIntersectingChunk(txn, migrateInfo.chunk.min); if (c->getMin().woCompare(migrateInfo.chunk.min) || c->getMax().woCompare(migrateInfo.chunk.max)) { log() << "chunk mismatch after reload, ignoring will retry issue " << migrateInfo.chunk.toString(); continue; } } BSONObj res; if (c->moveAndCommit(txn, migrateInfo.to, Chunk::MaxChunkSize, secondaryThrottle, waitForDelete, 0, /* maxTimeMS */ res)) { movedCount++; continue; } // The move requires acquiring the collection metadata's lock, which can fail. log() << "balancer move failed: " << res << " from: " << migrateInfo.from << " to: " << migrateInfo.to << " chunk: " << migrateInfo.chunk; Status moveStatus = getStatusFromCommandResult(res); if (moveStatus == ErrorCodes::ChunkTooBig || res["chunkTooBig"].trueValue()) { // Reload just to be safe cm = cfg->getChunkManager(txn, migrateInfo.ns); invariant(cm); c = cm->findIntersectingChunk(txn, migrateInfo.chunk.min); log() << "performing a split because migrate failed for size reasons"; Status status = c->split(txn, Chunk::normal, NULL, NULL); log() << "split results: " << status; if (!status.isOK()) { log() << "marking chunk as jumbo: " << c->toString(); c->markAsJumbo(txn); // We increment moveCount so we do another round right away movedCount++; } } } catch (const DBException& ex) { warning() << "could not move chunk " << migrateInfo.chunk.toString() << ", continuing balancing round" << causedBy(ex); } } return movedCount; }
int Balancer::_moveChunks(const vector<CandidateChunkPtr>* candidateChunks, const WriteConcernOptions* writeConcern, bool waitForDelete) { int movedCount = 0; for (vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it) { // If the balancer was disabled since we started this round, don't start new // chunks moves. SettingsType balancerConfig; std::string errMsg; if (!grid.getBalancerSettings(&balancerConfig, &errMsg)) { warning() << errMsg; // No point in continuing the round if the config servers are unreachable. return movedCount; } if ((balancerConfig.isKeySet() && // balancer config doc exists !grid.shouldBalance(balancerConfig)) || MONGO_FAIL_POINT(skipBalanceRound)) { LOG(1) << "Stopping balancing round early as balancing was disabled"; return movedCount; } // Changes to metadata, borked metadata, and connectivity problems between shards should // cause us to abort this chunk move, but shouldn't cause us to abort the entire round // of chunks. // TODO(spencer): We probably *should* abort the whole round on issues communicating // with the config servers, but its impossible to distinguish those types of failures // at the moment. // TODO: Handle all these things more cleanly, since they're expected problems const CandidateChunk& chunkInfo = *it->get(); try { DBConfigPtr cfg = grid.getDBConfig(chunkInfo.ns); verify(cfg); // NOTE: We purposely do not reload metadata here, since _doBalanceRound already // tried to do so once. ChunkManagerPtr cm = cfg->getChunkManager(chunkInfo.ns); verify(cm); ChunkPtr c = cm->findIntersectingChunk(chunkInfo.chunk.min); if (c->getMin().woCompare(chunkInfo.chunk.min) || c->getMax().woCompare(chunkInfo.chunk.max)) { // likely a split happened somewhere cm = cfg->getChunkManager(chunkInfo.ns, true /* reload */); verify(cm); c = cm->findIntersectingChunk(chunkInfo.chunk.min); if (c->getMin().woCompare(chunkInfo.chunk.min) || c->getMax().woCompare(chunkInfo.chunk.max)) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if (c->moveAndCommit(Shard::make(chunkInfo.to), Chunk::MaxChunkSize, writeConcern, waitForDelete, 0, /* maxTimeMS */ res)) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if (res["chunkTooBig"].trueValue()) { // reload just to be safe cm = cfg->getChunkManager(chunkInfo.ns); verify(cm); c = cm->findIntersectingChunk(chunkInfo.chunk.min); log() << "performing a split because migrate failed for size reasons"; Status status = c->split(Chunk::normal, NULL, NULL); log() << "split results: " << status << endl; if (!status.isOK()) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } catch (const DBException& ex) { warning() << "could not move chunk " << chunkInfo.chunk.toString() << ", continuing balancing round" << causedBy(ex) << endl; } } return movedCount; }